use of org.apache.cassandra.io.util.File in project cassandra by apache.
the class Record method record.
public static void record(String saveToDir, long seed, boolean withRng, boolean withRngCallSites, ClusterSimulation.Builder<?> builder) {
File eventFile = new File(new File(saveToDir), Long.toHexString(seed) + ".gz");
File rngFile = new File(new File(saveToDir), Long.toHexString(seed) + ".rng.gz");
{
Set<String> modifiers = new LinkedHashSet<>();
if (withRngCallSites)
modifiers.add("rngCallSites");
else if (withRng)
modifiers.add("rng");
if (builder.capture().waitSites)
modifiers.add("WaitSites");
if (builder.capture().wakeSites)
modifiers.add("WakeSites");
logger.error("Seed 0x{} ({}) (With: {})", Long.toHexString(seed), eventFile, modifiers);
}
try (PrintWriter eventOut = new PrintWriter(new GZIPOutputStream(eventFile.newOutputStream(OVERWRITE), 1 << 16));
DataOutputStreamPlus rngOut = new BufferedDataOutputStreamPlus(Channels.newChannel(withRng ? new GZIPOutputStream(rngFile.newOutputStream(OVERWRITE), 1 << 16) : new ByteArrayOutputStream(0)))) {
eventOut.println("modifiers:" + (withRng ? "rng," : "") + (withRngCallSites ? "rngCallSites," : "") + (builder.capture().waitSites ? "waitSites," : "") + (builder.capture().wakeSites ? "wakeSites," : ""));
RandomSourceRecorder random;
if (withRng) {
random = new RandomSourceRecorder(rngOut, new RandomSource.Default(), withRngCallSites);
builder.random(random);
} else {
random = null;
}
// periodic forced flush to ensure state is on disk after some kind of stall
Thread flusher = new Thread(() -> {
try {
while (true) {
Thread.sleep(1000);
eventOut.flush();
if (random != null) {
synchronized (random) {
rngOut.flush();
}
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException ignore) {
} finally {
eventOut.flush();
try {
if (random != null) {
synchronized (random) {
rngOut.flush();
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}, "Flush Recordings of " + seed);
flusher.setDaemon(true);
flusher.start();
try (ClusterSimulation<?> cluster = builder.create(seed)) {
try (CloseableIterator<?> iter = cluster.simulation.iterator()) {
while (iter.hasNext()) eventOut.println(normaliseRecordingOut(iter.next().toString()));
if (random != null)
random.close();
} finally {
eventOut.flush();
rngOut.flush();
}
} finally {
flusher.interrupt();
}
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException("Failed on seed " + Long.toHexString(seed), t);
}
}
use of org.apache.cassandra.io.util.File in project cassandra by apache.
the class CompactionManager method antiCompactGroup.
@VisibleForTesting
int antiCompactGroup(ColumnFamilyStore cfs, RangesAtEndpoint ranges, LifecycleTransaction txn, UUID pendingRepair, BooleanSupplier isCancelled) {
Preconditions.checkArgument(!ranges.isEmpty(), "need at least one full or transient range");
long groupMaxDataAge = -1;
for (Iterator<SSTableReader> i = txn.originals().iterator(); i.hasNext(); ) {
SSTableReader sstable = i.next();
if (groupMaxDataAge < sstable.maxDataAge)
groupMaxDataAge = sstable.maxDataAge;
}
if (txn.originals().size() == 0) {
logger.info("No valid anticompactions for this group, All sstables were compacted and are no longer available");
return 0;
}
logger.info("Anticompacting {} in {}.{} for {}", txn.originals(), cfs.keyspace.getName(), cfs.getTableName(), pendingRepair);
Set<SSTableReader> sstableAsSet = txn.originals();
File destination = cfs.getDirectories().getWriteableLocationAsFile(cfs.getExpectedCompactedFileSize(sstableAsSet, OperationType.ANTICOMPACTION));
int nowInSec = FBUtilities.nowInSeconds();
RateLimiter limiter = getRateLimiter();
/**
* HACK WARNING
*
* We have multiple writers operating over the same Transaction, producing different sets of sstables that all
* logically replace the transaction's originals. The SSTableRewriter assumes it has exclusive control over
* the transaction state, and this will lead to temporarily inconsistent sstable/tracker state if we do not
* take special measures to avoid it.
*
* Specifically, if a number of rewriter have prepareToCommit() invoked in sequence, then two problematic things happen:
* 1. The obsoleteOriginals() call of the first rewriter immediately remove the originals from the tracker, despite
* their having been only partially replaced. To avoid this, we must either avoid obsoleteOriginals() or checkpoint()
* 2. The LifecycleTransaction may only have prepareToCommit() invoked once, and this will checkpoint() also.
*
* Similarly commit() would finalise partially complete on-disk state.
*
* To avoid these problems, we introduce a SharedTxn that proxies all calls onto the underlying transaction
* except prepareToCommit(), checkpoint(), obsoleteOriginals(), and commit().
* We then invoke these methods directly once each of the rewriter has updated the transaction
* with their share of replacements.
*
* Note that for the same essential reason we also explicitly disable early open.
* By noop-ing checkpoint we avoid any of the problems with early open, but by continuing to explicitly
* disable it we also prevent any of the extra associated work from being performed.
*/
class SharedTxn extends WrappedLifecycleTransaction {
public SharedTxn(ILifecycleTransaction delegate) {
super(delegate);
}
public Throwable commit(Throwable accumulate) {
return accumulate;
}
public void prepareToCommit() {
}
public void checkpoint() {
}
public void obsoleteOriginals() {
}
public void close() {
}
}
CompactionStrategyManager strategy = cfs.getCompactionStrategyManager();
try (SharedTxn sharedTxn = new SharedTxn(txn);
SSTableRewriter fullWriter = SSTableRewriter.constructWithoutEarlyOpening(sharedTxn, false, groupMaxDataAge);
SSTableRewriter transWriter = SSTableRewriter.constructWithoutEarlyOpening(sharedTxn, false, groupMaxDataAge);
SSTableRewriter unrepairedWriter = SSTableRewriter.constructWithoutEarlyOpening(sharedTxn, false, groupMaxDataAge);
AbstractCompactionStrategy.ScannerList scanners = strategy.getScanners(txn.originals());
CompactionController controller = new CompactionController(cfs, sstableAsSet, getDefaultGcBefore(cfs, nowInSec));
CompactionIterator ci = getAntiCompactionIterator(scanners.scanners, controller, nowInSec, UUIDGen.getTimeUUID(), active, isCancelled)) {
int expectedBloomFilterSize = Math.max(cfs.metadata().params.minIndexInterval, (int) (SSTableReader.getApproximateKeyCount(sstableAsSet)));
fullWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, UNREPAIRED_SSTABLE, pendingRepair, false, sstableAsSet, txn));
transWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, UNREPAIRED_SSTABLE, pendingRepair, true, sstableAsSet, txn));
unrepairedWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, UNREPAIRED_SSTABLE, NO_PENDING_REPAIR, false, sstableAsSet, txn));
Predicate<Token> fullChecker = !ranges.onlyFull().isEmpty() ? new Range.OrderedRangeContainmentChecker(ranges.onlyFull().ranges()) : t -> false;
Predicate<Token> transChecker = !ranges.onlyTransient().isEmpty() ? new Range.OrderedRangeContainmentChecker(ranges.onlyTransient().ranges()) : t -> false;
double compressionRatio = scanners.getCompressionRatio();
if (compressionRatio == MetadataCollector.NO_COMPRESSION_RATIO)
compressionRatio = 1.0;
long lastBytesScanned = 0;
while (ci.hasNext()) {
try (UnfilteredRowIterator partition = ci.next()) {
Token token = partition.partitionKey().getToken();
// if this row is contained in the full or transient ranges, append it to the appropriate sstable
if (fullChecker.test(token)) {
fullWriter.append(partition);
} else if (transChecker.test(token)) {
transWriter.append(partition);
} else {
// otherwise, append it to the unrepaired sstable
unrepairedWriter.append(partition);
}
long bytesScanned = scanners.getTotalBytesScanned();
compactionRateLimiterAcquire(limiter, bytesScanned, lastBytesScanned, compressionRatio);
lastBytesScanned = bytesScanned;
}
}
fullWriter.prepareToCommit();
transWriter.prepareToCommit();
unrepairedWriter.prepareToCommit();
txn.checkpoint();
txn.obsoleteOriginals();
txn.prepareToCommit();
List<SSTableReader> fullSSTables = new ArrayList<>(fullWriter.finished());
List<SSTableReader> transSSTables = new ArrayList<>(transWriter.finished());
List<SSTableReader> unrepairedSSTables = new ArrayList<>(unrepairedWriter.finished());
fullWriter.commit();
transWriter.commit();
unrepairedWriter.commit();
txn.commit();
logger.info("Anticompacted {} in {}.{} to full = {}, transient = {}, unrepaired = {} for {}", sstableAsSet, cfs.keyspace.getName(), cfs.getTableName(), fullSSTables, transSSTables, unrepairedSSTables, pendingRepair);
return fullSSTables.size() + transSSTables.size() + unrepairedSSTables.size();
} catch (Throwable e) {
if (e instanceof CompactionInterruptedException && isCancelled.getAsBoolean()) {
logger.info("Anticompaction has been canceled for session {}", pendingRepair);
logger.trace(e.getMessage(), e);
} else {
JVMStabilityInspector.inspectThrowable(e);
logger.error("Error anticompacting " + txn + " for " + pendingRepair, e);
}
throw e;
}
}
use of org.apache.cassandra.io.util.File in project cassandra by apache.
the class CompactionManager method forceUserDefinedCompaction.
public void forceUserDefinedCompaction(String dataFiles) {
String[] filenames = dataFiles.split(",");
Multimap<ColumnFamilyStore, Descriptor> descriptors = ArrayListMultimap.create();
for (String filename : filenames) {
// extract keyspace and columnfamily name from filename
Descriptor desc = Descriptor.fromFilename(filename.trim());
if (Schema.instance.getTableMetadataRef(desc) == null) {
logger.warn("Schema does not exist for file {}. Skipping.", filename);
continue;
}
// group by keyspace/columnfamily
ColumnFamilyStore cfs = Keyspace.open(desc.ksname).getColumnFamilyStore(desc.cfname);
descriptors.put(cfs, cfs.getDirectories().find(new File(filename.trim()).name()));
}
List<Future<?>> futures = new ArrayList<>(descriptors.size());
int nowInSec = FBUtilities.nowInSeconds();
for (ColumnFamilyStore cfs : descriptors.keySet()) futures.add(submitUserDefined(cfs, descriptors.get(cfs), getDefaultGcBefore(cfs, nowInSec)));
FBUtilities.waitOnFutures(futures);
}
use of org.apache.cassandra.io.util.File in project cassandra by apache.
the class Scrubber method scrub.
public void scrub() {
List<SSTableReader> finished = new ArrayList<>();
outputHandler.output(String.format("Scrubbing %s (%s)", sstable, FBUtilities.prettyPrintMemory(dataFile.length())));
try (SSTableRewriter writer = SSTableRewriter.construct(cfs, transaction, false, sstable.maxDataAge);
Refs<SSTableReader> refs = Refs.ref(Collections.singleton(sstable))) {
nextIndexKey = indexAvailable() ? ByteBufferUtil.readWithShortLength(indexFile) : null;
if (indexAvailable()) {
// throw away variable so we don't have a side effect in the assert
long firstRowPositionFromIndex = rowIndexEntrySerializer.deserializePositionAndSkip(indexFile);
assert firstRowPositionFromIndex == 0 : firstRowPositionFromIndex;
}
StatsMetadata metadata = sstable.getSSTableMetadata();
writer.switchWriter(CompactionManager.createWriter(cfs, destination, expectedBloomFilterSize, metadata.repairedAt, metadata.pendingRepair, metadata.isTransient, sstable, transaction));
DecoratedKey prevKey = null;
while (!dataFile.isEOF()) {
if (scrubInfo.isStopRequested())
throw new CompactionInterruptedException(scrubInfo.getCompactionInfo());
long partitionStart = dataFile.getFilePointer();
outputHandler.debug("Reading row at " + partitionStart);
DecoratedKey key = null;
try {
ByteBuffer raw = ByteBufferUtil.readWithShortLength(dataFile);
if (!cfs.metadata.getLocal().isIndex())
cfs.metadata.getLocal().partitionKeyType.validate(raw);
key = sstable.decorateKey(raw);
} catch (Throwable th) {
throwIfFatal(th);
// check for null key below
}
updateIndexKey();
long dataStart = dataFile.getFilePointer();
long dataStartFromIndex = -1;
long dataSizeFromIndex = -1;
if (currentIndexKey != null) {
dataStartFromIndex = currentPartitionPositionFromIndex + 2 + currentIndexKey.remaining();
dataSizeFromIndex = nextPartitionPositionFromIndex - dataStartFromIndex;
}
String keyName = key == null ? "(unreadable key)" : keyString(key);
outputHandler.debug(String.format("partition %s is %s", keyName, FBUtilities.prettyPrintMemory(dataSizeFromIndex)));
assert currentIndexKey != null || !indexAvailable();
try {
if (key == null)
throw new IOError(new IOException("Unable to read partition key from data file"));
if (currentIndexKey != null && !key.getKey().equals(currentIndexKey)) {
throw new IOError(new IOException(String.format("Key from data file (%s) does not match key from index file (%s)", // ByteBufferUtil.bytesToHex(key.getKey()), ByteBufferUtil.bytesToHex(currentIndexKey))));
"_too big_", ByteBufferUtil.bytesToHex(currentIndexKey))));
}
if (indexFile != null && dataSizeFromIndex > dataFile.length())
throw new IOError(new IOException("Impossible partition size (greater than file length): " + dataSizeFromIndex));
if (indexFile != null && dataStart != dataStartFromIndex)
outputHandler.warn(String.format("Data file partition position %d differs from index file row position %d", dataStart, dataStartFromIndex));
if (tryAppend(prevKey, key, writer))
prevKey = key;
} catch (Throwable th) {
throwIfFatal(th);
outputHandler.warn(String.format("Error reading partition %s (stacktrace follows):", keyName), th);
if (currentIndexKey != null && (key == null || !key.getKey().equals(currentIndexKey) || dataStart != dataStartFromIndex)) {
outputHandler.output(String.format("Retrying from partition index; data is %s bytes starting at %s", dataSizeFromIndex, dataStartFromIndex));
key = sstable.decorateKey(currentIndexKey);
try {
if (!cfs.metadata.getLocal().isIndex())
cfs.metadata.getLocal().partitionKeyType.validate(key.getKey());
dataFile.seek(dataStartFromIndex);
if (tryAppend(prevKey, key, writer))
prevKey = key;
} catch (Throwable th2) {
throwIfFatal(th2);
throwIfCannotContinue(key, th2);
outputHandler.warn("Retry failed too. Skipping to next partition (retry's stacktrace follows)", th2);
badPartitions++;
seekToNextPartition();
}
} else {
throwIfCannotContinue(key, th);
outputHandler.warn("Partition starting at position " + dataStart + " is unreadable; skipping to next");
badPartitions++;
if (currentIndexKey != null)
seekToNextPartition();
}
}
}
if (!outOfOrder.isEmpty()) {
// out of order partitions/rows, but no bad partition found - we can keep our repairedAt time
long repairedAt = badPartitions > 0 ? ActiveRepairService.UNREPAIRED_SSTABLE : sstable.getSSTableMetadata().repairedAt;
SSTableReader newInOrderSstable;
try (SSTableWriter inOrderWriter = CompactionManager.createWriter(cfs, destination, expectedBloomFilterSize, repairedAt, metadata.pendingRepair, metadata.isTransient, sstable, transaction)) {
for (Partition partition : outOfOrder) inOrderWriter.append(partition.unfilteredIterator());
newInOrderSstable = inOrderWriter.finish(-1, sstable.maxDataAge, true);
}
transaction.update(newInOrderSstable, false);
finished.add(newInOrderSstable);
outputHandler.warn(String.format("%d out of order partition (or partitions with out of order rows) found while scrubbing %s; " + "Those have been written (in order) to a new sstable (%s)", outOfOrder.size(), sstable, newInOrderSstable));
}
// finish obsoletes the old sstable
finished.addAll(writer.setRepairedAt(badPartitions > 0 ? ActiveRepairService.UNREPAIRED_SSTABLE : sstable.getSSTableMetadata().repairedAt).finish());
} catch (IOException e) {
throw Throwables.propagate(e);
} finally {
if (transaction.isOffline())
finished.forEach(sstable -> sstable.selfRef().release());
}
if (!finished.isEmpty()) {
outputHandler.output("Scrub of " + sstable + " complete: " + goodPartitions + " partitions in new sstable and " + emptyPartitions + " empty (tombstoned) partitions dropped");
if (negativeLocalDeletionInfoMetrics.fixedRows > 0)
outputHandler.output("Fixed " + negativeLocalDeletionInfoMetrics.fixedRows + " rows with overflowed local deletion time.");
if (badPartitions > 0)
outputHandler.warn("Unable to recover " + badPartitions + " partitions that were skipped. You can attempt manual recovery from the pre-scrub snapshot. You can also run nodetool repair to transfer the data from a healthy replica, if any");
} else {
if (badPartitions > 0)
outputHandler.warn("No valid partitions found while scrubbing " + sstable + "; it is marked for deletion now. If you want to attempt manual recovery, you can find a copy in the pre-scrub snapshot");
else
outputHandler.output("Scrub of " + sstable + " complete; looks like all " + emptyPartitions + " partitions were tombstoned");
}
}
use of org.apache.cassandra.io.util.File in project cassandra by apache.
the class LogReplicaSet method addReplica.
void addReplica(File file) {
File directory = file.parent();
assert !replicasByFile.containsKey(directory);
try {
replicasByFile.put(directory, LogReplica.open(file));
} catch (FSError e) {
logger.error("Failed to open log replica {}", file, e);
FileUtils.handleFSErrorAndPropagate(e);
}
logger.trace("Added log file replica {} ", file);
}
Aggregations