use of org.apache.cassandra.utils.concurrent.Refs in project cassandra by apache.
the class StreamSession method getSSTableSectionsForRanges.
@VisibleForTesting
public static List<SSTableStreamingSections> getSSTableSectionsForRanges(Collection<Range<Token>> ranges, Collection<ColumnFamilyStore> stores, long overriddenRepairedAt, UUID pendingRepair) {
Refs<SSTableReader> refs = new Refs<>();
try {
for (ColumnFamilyStore cfStore : stores) {
final List<Range<PartitionPosition>> keyRanges = new ArrayList<>(ranges.size());
for (Range<Token> range : ranges) keyRanges.add(Range.makeRowRange(range));
refs.addAll(cfStore.selectAndReference(view -> {
Set<SSTableReader> sstables = Sets.newHashSet();
SSTableIntervalTree intervalTree = SSTableIntervalTree.build(view.select(SSTableSet.CANONICAL));
Predicate<SSTableReader> predicate;
if (pendingRepair == ActiveRepairService.NO_PENDING_REPAIR) {
predicate = Predicates.alwaysTrue();
} else {
predicate = s -> s.isPendingRepair() && s.getSSTableMetadata().pendingRepair.equals(pendingRepair);
}
for (Range<PartitionPosition> keyRange : keyRanges) {
for (SSTableReader sstable : Iterables.filter(View.sstablesInBounds(keyRange.left, keyRange.right, intervalTree), predicate)) {
sstables.add(sstable);
}
}
if (logger.isDebugEnabled())
logger.debug("ViewFilter for {}/{} sstables", sstables.size(), Iterables.size(view.select(SSTableSet.CANONICAL)));
return sstables;
}).refs);
}
List<SSTableStreamingSections> sections = new ArrayList<>(refs.size());
for (SSTableReader sstable : refs) {
long repairedAt = overriddenRepairedAt;
if (overriddenRepairedAt == ActiveRepairService.UNREPAIRED_SSTABLE)
repairedAt = sstable.getSSTableMetadata().repairedAt;
sections.add(new SSTableStreamingSections(refs.get(sstable), sstable.getPositionsForRanges(ranges), sstable.estimatedKeysForRanges(ranges), repairedAt));
}
return sections;
} catch (Throwable t) {
refs.release();
throw t;
}
}
use of org.apache.cassandra.utils.concurrent.Refs in project cassandra by apache.
the class CompactionAllocationTest method testIndexingWidePartitions.
private static void testIndexingWidePartitions(String name, int numSSTable, int sstablePartitions, IndexDef... indexes) throws Throwable {
String ksname = "ks_" + name.toLowerCase();
SchemaLoader.createKeyspace(ksname, KeyspaceParams.simple(1), CreateTableStatement.parse("CREATE TABLE tbl (k text, c text, v1 text, v2 text, v3 text, v4 text, PRIMARY KEY (k, c))", ksname).build());
ColumnFamilyStore cfs = Schema.instance.getColumnFamilyStoreInstance(Schema.instance.getTableMetadata(ksname, "tbl").id);
Assert.assertNotNull(cfs);
cfs.disableAutoCompaction();
int rowWidth = 100;
int rowsPerPartition = 1000;
measure(new Workload() {
@SuppressWarnings("UnstableApiUsage")
public void setup() {
cfs.disableAutoCompaction();
String insert = String.format("INSERT INTO %s.%s (k, c, v1, v2, v3, v4) VALUES (?, ?, ?, ?, ?, ?)", ksname, "tbl");
for (int f = 0; f < numSSTable; f++) {
for (int p = 0; p < sstablePartitions; p++) {
String key = String.format("%08d", (f * sstablePartitions) + p);
for (int r = 0; r < rowsPerPartition; r++) {
QueryProcessor.executeInternal(insert, key, makeRandomString(6, -1), makeRandomString(rowWidth >> 2), makeRandomString(rowWidth >> 2), makeRandomString(rowWidth >> 2), makeRandomString(rowWidth >> 2));
}
}
cfs.forceBlockingFlush();
}
for (IndexDef index : indexes) {
QueryProcessor.executeInternal(String.format(index.cql, index.name, ksname, "tbl"));
while (!cfs.indexManager.getBuiltIndexNames().contains(index.name)) Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
}
Assert.assertEquals(numSSTable, cfs.getLiveSSTables().size());
}
public ColumnFamilyStore getCfs() {
return cfs;
}
public List<Runnable> getReads() {
return new ArrayList<>();
}
public String name() {
return name;
}
public int executeReads() {
// return 1 to avoid divide by zero error
return 1;
}
public void executeCompactions() {
logger.info("Starting index re-build");
try (ColumnFamilyStore.RefViewFragment viewFragment = cfs.selectAndReference(View.selectFunction(SSTableSet.CANONICAL));
Refs<SSTableReader> sstables = viewFragment.refs) {
Set<Index> indexes = new HashSet<>(cfs.indexManager.listIndexes());
SecondaryIndexBuilder builder = new CollatedViewIndexBuilder(cfs, indexes, new ReducingKeyIterator(sstables), ImmutableSet.copyOf(sstables));
builder.build();
}
logger.info("Index re-build complete");
}
public int[] getSSTableStats() {
int numPartitions = cfs.getLiveSSTables().stream().mapToInt(sstable -> Ints.checkedCast(sstable.getSSTableMetadata().estimatedPartitionSize.count())).sum();
int numRows = cfs.getLiveSSTables().stream().mapToInt(sstable -> Ints.checkedCast(sstable.getSSTableMetadata().totalRows)).sum();
return new int[] { numPartitions, numRows };
}
});
}
use of org.apache.cassandra.utils.concurrent.Refs in project cassandra by apache.
the class ViewBuilderTask method call.
public Long call() {
String ksName = baseCfs.metadata.keyspace;
if (prevToken == null)
logger.debug("Starting new view build for range {}", range);
else
logger.debug("Resuming view build for range {} from token {} with {} covered keys", range, prevToken, keysBuilt);
/*
* It's possible for view building to start before MV creation got propagated to other nodes. For this reason
* we should wait for schema to converge before attempting to send any view mutations to other nodes, or else
* face UnknownTableException upon Mutation deserialization on the nodes that haven't processed the schema change.
*/
boolean schemaConverged = Gossiper.instance.waitForSchemaAgreement(10, TimeUnit.SECONDS, () -> this.isStopped);
if (!schemaConverged)
logger.warn("Failed to get schema to converge before building view {}.{}", baseCfs.keyspace.getName(), view.name);
Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>> function;
function = org.apache.cassandra.db.lifecycle.View.select(SSTableSet.CANONICAL, s -> range.intersects(s.getBounds()));
try (ColumnFamilyStore.RefViewFragment viewFragment = baseCfs.selectAndReference(function);
Refs<SSTableReader> sstables = viewFragment.refs;
ReducingKeyIterator keyIter = new ReducingKeyIterator(sstables)) {
PeekingIterator<DecoratedKey> iter = Iterators.peekingIterator(keyIter);
while (!isStopped && iter.hasNext()) {
DecoratedKey key = iter.next();
Token token = key.getToken();
// skip tokens already built or not present in range
if (range.contains(token) && (prevToken == null || token.compareTo(prevToken) > 0)) {
buildKey(key);
++keysBuilt;
// build other keys sharing the same token
while (iter.hasNext() && iter.peek().getToken().equals(token)) {
key = iter.next();
buildKey(key);
++keysBuilt;
}
if (keysBuilt % ROWS_BETWEEN_CHECKPOINTS == 1)
SystemKeyspace.updateViewBuildStatus(ksName, view.name, range, token, keysBuilt);
prevToken = token;
}
}
}
finish();
return keysBuilt;
}
use of org.apache.cassandra.utils.concurrent.Refs in project cassandra by apache.
the class Scrubber method scrub.
public void scrub() {
List<SSTableReader> finished = new ArrayList<>();
outputHandler.output(String.format("Scrubbing %s (%s)", sstable, FBUtilities.prettyPrintMemory(dataFile.length())));
try (SSTableRewriter writer = SSTableRewriter.construct(cfs, transaction, false, sstable.maxDataAge);
Refs<SSTableReader> refs = Refs.ref(Collections.singleton(sstable))) {
nextIndexKey = indexAvailable() ? ByteBufferUtil.readWithShortLength(indexFile) : null;
if (indexAvailable()) {
// throw away variable so we don't have a side effect in the assert
long firstRowPositionFromIndex = rowIndexEntrySerializer.deserializePositionAndSkip(indexFile);
assert firstRowPositionFromIndex == 0 : firstRowPositionFromIndex;
}
StatsMetadata metadata = sstable.getSSTableMetadata();
writer.switchWriter(CompactionManager.createWriter(cfs, destination, expectedBloomFilterSize, metadata.repairedAt, metadata.pendingRepair, metadata.isTransient, sstable, transaction));
DecoratedKey prevKey = null;
while (!dataFile.isEOF()) {
if (scrubInfo.isStopRequested())
throw new CompactionInterruptedException(scrubInfo.getCompactionInfo());
long partitionStart = dataFile.getFilePointer();
outputHandler.debug("Reading row at " + partitionStart);
DecoratedKey key = null;
try {
ByteBuffer raw = ByteBufferUtil.readWithShortLength(dataFile);
if (!cfs.metadata.getLocal().isIndex())
cfs.metadata.getLocal().partitionKeyType.validate(raw);
key = sstable.decorateKey(raw);
} catch (Throwable th) {
throwIfFatal(th);
// check for null key below
}
updateIndexKey();
long dataStart = dataFile.getFilePointer();
long dataStartFromIndex = -1;
long dataSizeFromIndex = -1;
if (currentIndexKey != null) {
dataStartFromIndex = currentPartitionPositionFromIndex + 2 + currentIndexKey.remaining();
dataSizeFromIndex = nextPartitionPositionFromIndex - dataStartFromIndex;
}
String keyName = key == null ? "(unreadable key)" : keyString(key);
outputHandler.debug(String.format("partition %s is %s", keyName, FBUtilities.prettyPrintMemory(dataSizeFromIndex)));
assert currentIndexKey != null || !indexAvailable();
try {
if (key == null)
throw new IOError(new IOException("Unable to read partition key from data file"));
if (currentIndexKey != null && !key.getKey().equals(currentIndexKey)) {
throw new IOError(new IOException(String.format("Key from data file (%s) does not match key from index file (%s)", // ByteBufferUtil.bytesToHex(key.getKey()), ByteBufferUtil.bytesToHex(currentIndexKey))));
"_too big_", ByteBufferUtil.bytesToHex(currentIndexKey))));
}
if (indexFile != null && dataSizeFromIndex > dataFile.length())
throw new IOError(new IOException("Impossible partition size (greater than file length): " + dataSizeFromIndex));
if (indexFile != null && dataStart != dataStartFromIndex)
outputHandler.warn(String.format("Data file partition position %d differs from index file row position %d", dataStart, dataStartFromIndex));
if (tryAppend(prevKey, key, writer))
prevKey = key;
} catch (Throwable th) {
throwIfFatal(th);
outputHandler.warn(String.format("Error reading partition %s (stacktrace follows):", keyName), th);
if (currentIndexKey != null && (key == null || !key.getKey().equals(currentIndexKey) || dataStart != dataStartFromIndex)) {
outputHandler.output(String.format("Retrying from partition index; data is %s bytes starting at %s", dataSizeFromIndex, dataStartFromIndex));
key = sstable.decorateKey(currentIndexKey);
try {
if (!cfs.metadata.getLocal().isIndex())
cfs.metadata.getLocal().partitionKeyType.validate(key.getKey());
dataFile.seek(dataStartFromIndex);
if (tryAppend(prevKey, key, writer))
prevKey = key;
} catch (Throwable th2) {
throwIfFatal(th2);
throwIfCannotContinue(key, th2);
outputHandler.warn("Retry failed too. Skipping to next partition (retry's stacktrace follows)", th2);
badPartitions++;
seekToNextPartition();
}
} else {
throwIfCannotContinue(key, th);
outputHandler.warn("Partition starting at position " + dataStart + " is unreadable; skipping to next");
badPartitions++;
if (currentIndexKey != null)
seekToNextPartition();
}
}
}
if (!outOfOrder.isEmpty()) {
// out of order partitions/rows, but no bad partition found - we can keep our repairedAt time
long repairedAt = badPartitions > 0 ? ActiveRepairService.UNREPAIRED_SSTABLE : sstable.getSSTableMetadata().repairedAt;
SSTableReader newInOrderSstable;
try (SSTableWriter inOrderWriter = CompactionManager.createWriter(cfs, destination, expectedBloomFilterSize, repairedAt, metadata.pendingRepair, metadata.isTransient, sstable, transaction)) {
for (Partition partition : outOfOrder) inOrderWriter.append(partition.unfilteredIterator());
newInOrderSstable = inOrderWriter.finish(-1, sstable.maxDataAge, true);
}
transaction.update(newInOrderSstable, false);
finished.add(newInOrderSstable);
outputHandler.warn(String.format("%d out of order partition (or partitions with out of order rows) found while scrubbing %s; " + "Those have been written (in order) to a new sstable (%s)", outOfOrder.size(), sstable, newInOrderSstable));
}
// finish obsoletes the old sstable
finished.addAll(writer.setRepairedAt(badPartitions > 0 ? ActiveRepairService.UNREPAIRED_SSTABLE : sstable.getSSTableMetadata().repairedAt).finish());
} catch (IOException e) {
throw Throwables.propagate(e);
} finally {
if (transaction.isOffline())
finished.forEach(sstable -> sstable.selfRef().release());
}
if (!finished.isEmpty()) {
outputHandler.output("Scrub of " + sstable + " complete: " + goodPartitions + " partitions in new sstable and " + emptyPartitions + " empty (tombstoned) partitions dropped");
if (negativeLocalDeletionInfoMetrics.fixedRows > 0)
outputHandler.output("Fixed " + negativeLocalDeletionInfoMetrics.fixedRows + " rows with overflowed local deletion time.");
if (badPartitions > 0)
outputHandler.warn("Unable to recover " + badPartitions + " partitions that were skipped. You can attempt manual recovery from the pre-scrub snapshot. You can also run nodetool repair to transfer the data from a healthy replica, if any");
} else {
if (badPartitions > 0)
outputHandler.warn("No valid partitions found while scrubbing " + sstable + "; it is marked for deletion now. If you want to attempt manual recovery, you can find a copy in the pre-scrub snapshot");
else
outputHandler.output("Scrub of " + sstable + " complete; looks like all " + emptyPartitions + " partitions were tombstoned");
}
}
use of org.apache.cassandra.utils.concurrent.Refs in project cassandra by apache.
the class ActiveRepairServiceTest method testSnapshotAddSSTables.
@Test
public void testSnapshotAddSSTables() throws Exception {
ColumnFamilyStore store = prepareColumnFamilyStore();
UUID prsId = UUID.randomUUID();
Set<SSTableReader> original = Sets.newHashSet(store.select(View.select(SSTableSet.CANONICAL, (s) -> !s.isRepaired())).sstables);
Collection<Range<Token>> ranges = Collections.singleton(new Range<>(store.getPartitioner().getMinimumToken(), store.getPartitioner().getMinimumToken()));
ActiveRepairService.instance.registerParentRepairSession(prsId, FBUtilities.getBroadcastAddressAndPort(), Collections.singletonList(store), ranges, true, System.currentTimeMillis(), true, PreviewKind.NONE);
store.getRepairManager().snapshot(prsId.toString(), ranges, false);
UUID prsId2 = UUID.randomUUID();
ActiveRepairService.instance.registerParentRepairSession(prsId2, FBUtilities.getBroadcastAddressAndPort(), Collections.singletonList(store), ranges, true, System.currentTimeMillis(), true, PreviewKind.NONE);
createSSTables(store, 2);
store.getRepairManager().snapshot(prsId.toString(), ranges, false);
try (Refs<SSTableReader> refs = store.getSnapshotSSTableReaders(prsId.toString())) {
assertEquals(original, Sets.newHashSet(refs.iterator()));
}
}
Aggregations