use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.
the class DefaultCompactionWriter method switchCompactionLocation.
@Override
public void switchCompactionLocation(Directories.DataDirectory directory) {
@SuppressWarnings("resource") SSTableWriter writer = SSTableWriter.create(cfs.newSSTableDescriptor(getDirectories().getLocationForDisk(directory)), estimatedTotalKeys, minRepairedAt, pendingRepair, cfs.metadata, new MetadataCollector(txn.originals(), cfs.metadata().comparator, sstableLevel), SerializationHeader.make(cfs.metadata(), nonExpiredSSTables), cfs.indexManager.listIndexes(), txn);
sstableWriter.switchWriter(writer);
}
use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.
the class Scrubber method scrub.
public void scrub() {
List<SSTableReader> finished = new ArrayList<>();
boolean completed = false;
outputHandler.output(String.format("Scrubbing %s (%s)", sstable, FBUtilities.prettyPrintMemory(dataFile.length())));
try (SSTableRewriter writer = SSTableRewriter.construct(cfs, transaction, false, sstable.maxDataAge)) {
nextIndexKey = indexAvailable() ? ByteBufferUtil.readWithShortLength(indexFile) : null;
if (indexAvailable()) {
// throw away variable so we don't have a side effect in the assert
long firstRowPositionFromIndex = rowIndexEntrySerializer.deserializePositionAndSkip(indexFile);
assert firstRowPositionFromIndex == 0 : firstRowPositionFromIndex;
}
StatsMetadata metadata = sstable.getSSTableMetadata();
writer.switchWriter(CompactionManager.createWriter(cfs, destination, expectedBloomFilterSize, metadata.repairedAt, metadata.pendingRepair, sstable, transaction));
DecoratedKey prevKey = null;
while (!dataFile.isEOF()) {
if (scrubInfo.isStopRequested())
throw new CompactionInterruptedException(scrubInfo.getCompactionInfo());
long rowStart = dataFile.getFilePointer();
outputHandler.debug("Reading row at " + rowStart);
DecoratedKey key = null;
try {
key = sstable.decorateKey(ByteBufferUtil.readWithShortLength(dataFile));
} catch (Throwable th) {
throwIfFatal(th);
// check for null key below
}
updateIndexKey();
long dataStart = dataFile.getFilePointer();
long dataStartFromIndex = -1;
long dataSizeFromIndex = -1;
if (currentIndexKey != null) {
dataStartFromIndex = currentRowPositionFromIndex + 2 + currentIndexKey.remaining();
dataSizeFromIndex = nextRowPositionFromIndex - dataStartFromIndex;
}
// avoid an NPE if key is null
String keyName = key == null ? "(unreadable key)" : ByteBufferUtil.bytesToHex(key.getKey());
outputHandler.debug(String.format("row %s is %s", keyName, FBUtilities.prettyPrintMemory(dataSizeFromIndex)));
assert currentIndexKey != null || !indexAvailable();
try {
if (key == null)
throw new IOError(new IOException("Unable to read row key from data file"));
if (currentIndexKey != null && !key.getKey().equals(currentIndexKey)) {
throw new IOError(new IOException(String.format("Key from data file (%s) does not match key from index file (%s)", //ByteBufferUtil.bytesToHex(key.getKey()), ByteBufferUtil.bytesToHex(currentIndexKey))));
"_too big_", ByteBufferUtil.bytesToHex(currentIndexKey))));
}
if (indexFile != null && dataSizeFromIndex > dataFile.length())
throw new IOError(new IOException("Impossible row size (greater than file length): " + dataSizeFromIndex));
if (indexFile != null && dataStart != dataStartFromIndex)
outputHandler.warn(String.format("Data file row position %d differs from index file row position %d", dataStart, dataStartFromIndex));
if (tryAppend(prevKey, key, writer))
prevKey = key;
} catch (Throwable th) {
throwIfFatal(th);
outputHandler.warn("Error reading row (stacktrace follows):", th);
if (currentIndexKey != null && (key == null || !key.getKey().equals(currentIndexKey) || dataStart != dataStartFromIndex)) {
outputHandler.output(String.format("Retrying from row index; data is %s bytes starting at %s", dataSizeFromIndex, dataStartFromIndex));
key = sstable.decorateKey(currentIndexKey);
try {
dataFile.seek(dataStartFromIndex);
if (tryAppend(prevKey, key, writer))
prevKey = key;
} catch (Throwable th2) {
throwIfFatal(th2);
throwIfCannotContinue(key, th2);
outputHandler.warn("Retry failed too. Skipping to next row (retry's stacktrace follows)", th2);
badRows++;
seekToNextRow();
}
} else {
throwIfCannotContinue(key, th);
outputHandler.warn("Row starting at position " + dataStart + " is unreadable; skipping to next");
badRows++;
if (currentIndexKey != null)
seekToNextRow();
}
}
}
if (!outOfOrder.isEmpty()) {
// out of order rows, but no bad rows found - we can keep our repairedAt time
long repairedAt = badRows > 0 ? ActiveRepairService.UNREPAIRED_SSTABLE : metadata.repairedAt;
SSTableReader newInOrderSstable;
try (SSTableWriter inOrderWriter = CompactionManager.createWriter(cfs, destination, expectedBloomFilterSize, repairedAt, metadata.pendingRepair, sstable, transaction)) {
for (Partition partition : outOfOrder) inOrderWriter.append(partition.unfilteredIterator());
newInOrderSstable = inOrderWriter.finish(-1, sstable.maxDataAge, true);
}
transaction.update(newInOrderSstable, false);
finished.add(newInOrderSstable);
outputHandler.warn(String.format("%d out of order rows found while scrubbing %s; Those have been written (in order) to a new sstable (%s)", outOfOrder.size(), sstable, newInOrderSstable));
}
// finish obsoletes the old sstable
finished.addAll(writer.setRepairedAt(badRows > 0 ? ActiveRepairService.UNREPAIRED_SSTABLE : sstable.getSSTableMetadata().repairedAt).finish());
completed = true;
} catch (IOException e) {
throw Throwables.propagate(e);
} finally {
if (transaction.isOffline())
finished.forEach(sstable -> sstable.selfRef().release());
}
if (completed) {
if (badRows > 0)
outputHandler.warn("No valid rows found while scrubbing " + sstable + "; it is marked for deletion now. If you want to attempt manual recovery, you can find a copy in the pre-scrub snapshot");
else
outputHandler.output("Scrub of " + sstable + " complete; looks like all " + emptyRows + " rows were tombstoned");
} else {
outputHandler.output("Scrub of " + sstable + " complete: " + goodRows + " rows in new sstable and " + emptyRows + " empty (tombstoned) rows dropped");
if (badRows > 0)
outputHandler.warn("Unable to recover " + badRows + " rows that were skipped. You can attempt manual recovery from the pre-scrub snapshot. You can also run nodetool repair to transfer the data from a healthy replica, if any");
}
}
use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.
the class SplittingSizeTieredCompactionWriter method switchCompactionLocation.
@Override
public void switchCompactionLocation(Directories.DataDirectory location) {
this.location = location;
long currentPartitionsToWrite = Math.round(ratios[currentRatioIndex] * estimatedTotalKeys);
@SuppressWarnings("resource") SSTableWriter writer = SSTableWriter.create(cfs.newSSTableDescriptor(getDirectories().getLocationForDisk(location)), currentPartitionsToWrite, minRepairedAt, pendingRepair, cfs.metadata, new MetadataCollector(allSSTables, cfs.metadata().comparator, 0), SerializationHeader.make(cfs.metadata(), nonExpiredSSTables), cfs.indexManager.listIndexes(), txn);
logger.trace("Switching writer, currentPartitionsToWrite = {}", currentPartitionsToWrite);
sstableWriter.switchWriter(writer);
}
use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.
the class SSTableRewriter method doPrepare.
protected void doPrepare() {
switchWriter(null);
if (throwEarly)
throw new RuntimeException("exception thrown early in finish, for testing");
// No early open to finalize and replace
for (SSTableWriter writer : writers) {
assert writer.getFilePointer() > 0;
writer.setRepairedAt(repairedAt).setOpenResult(true).prepareToCommit();
SSTableReader reader = writer.finished();
transaction.update(reader, false);
preparedForCommit.add(reader);
}
transaction.checkpoint();
if (throwLate)
throw new RuntimeException("exception thrown after all sstables finished, for testing");
if (!keepOriginals)
transaction.obsoleteOriginals();
transaction.prepareToCommit();
}
use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.
the class SSTableWriterTest method testAbortTxnWithClosedWriterShouldRemoveSSTable.
@Test
public void testAbortTxnWithClosedWriterShouldRemoveSSTable() throws InterruptedException {
Keyspace keyspace = Keyspace.open(KEYSPACE);
ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(CF);
truncate(cfs);
File dir = cfs.getDirectories().getDirectoryForNewSSTables();
LifecycleTransaction txn = LifecycleTransaction.offline(OperationType.STREAM);
try (SSTableWriter writer = getWriter(cfs, dir, txn)) {
for (int i = 0; i < 10000; i++) {
UpdateBuilder builder = UpdateBuilder.create(cfs.metadata(), random(i, 10)).withTimestamp(1);
for (int j = 0; j < 100; j++) builder.newRow("" + j).add("val", ByteBuffer.allocate(1000));
writer.append(builder.build().unfilteredIterator());
}
assertFileCounts(dir.list());
for (int i = 10000; i < 20000; i++) {
UpdateBuilder builder = UpdateBuilder.create(cfs.metadata(), random(i, 10)).withTimestamp(1);
for (int j = 0; j < 100; j++) builder.newRow("" + j).add("val", ByteBuffer.allocate(1000));
writer.append(builder.build().unfilteredIterator());
}
SSTableReader sstable = writer.finish(true);
int datafiles = assertFileCounts(dir.list());
assertEquals(datafiles, 1);
sstable.selfRef().release();
// open till .abort() is called (via the builder)
if (!FBUtilities.isWindows) {
LifecycleTransaction.waitForDeletions();
assertFileCounts(dir.list());
}
txn.abort();
LifecycleTransaction.waitForDeletions();
datafiles = assertFileCounts(dir.list());
assertEquals(datafiles, 0);
validateCFS(cfs);
}
}
Aggregations