Search in sources :

Example 1 with SSTableWriter

use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.

the class DefaultCompactionWriter method switchCompactionLocation.

@Override
public void switchCompactionLocation(Directories.DataDirectory directory) {
    @SuppressWarnings("resource") SSTableWriter writer = SSTableWriter.create(cfs.newSSTableDescriptor(getDirectories().getLocationForDisk(directory)), estimatedTotalKeys, minRepairedAt, pendingRepair, cfs.metadata, new MetadataCollector(txn.originals(), cfs.metadata().comparator, sstableLevel), SerializationHeader.make(cfs.metadata(), nonExpiredSSTables), cfs.indexManager.listIndexes(), txn);
    sstableWriter.switchWriter(writer);
}
Also used : SSTableWriter(org.apache.cassandra.io.sstable.format.SSTableWriter) MetadataCollector(org.apache.cassandra.io.sstable.metadata.MetadataCollector)

Example 2 with SSTableWriter

use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.

the class Scrubber method scrub.

public void scrub() {
    List<SSTableReader> finished = new ArrayList<>();
    boolean completed = false;
    outputHandler.output(String.format("Scrubbing %s (%s)", sstable, FBUtilities.prettyPrintMemory(dataFile.length())));
    try (SSTableRewriter writer = SSTableRewriter.construct(cfs, transaction, false, sstable.maxDataAge)) {
        nextIndexKey = indexAvailable() ? ByteBufferUtil.readWithShortLength(indexFile) : null;
        if (indexAvailable()) {
            // throw away variable so we don't have a side effect in the assert
            long firstRowPositionFromIndex = rowIndexEntrySerializer.deserializePositionAndSkip(indexFile);
            assert firstRowPositionFromIndex == 0 : firstRowPositionFromIndex;
        }
        StatsMetadata metadata = sstable.getSSTableMetadata();
        writer.switchWriter(CompactionManager.createWriter(cfs, destination, expectedBloomFilterSize, metadata.repairedAt, metadata.pendingRepair, sstable, transaction));
        DecoratedKey prevKey = null;
        while (!dataFile.isEOF()) {
            if (scrubInfo.isStopRequested())
                throw new CompactionInterruptedException(scrubInfo.getCompactionInfo());
            long rowStart = dataFile.getFilePointer();
            outputHandler.debug("Reading row at " + rowStart);
            DecoratedKey key = null;
            try {
                key = sstable.decorateKey(ByteBufferUtil.readWithShortLength(dataFile));
            } catch (Throwable th) {
                throwIfFatal(th);
            // check for null key below
            }
            updateIndexKey();
            long dataStart = dataFile.getFilePointer();
            long dataStartFromIndex = -1;
            long dataSizeFromIndex = -1;
            if (currentIndexKey != null) {
                dataStartFromIndex = currentRowPositionFromIndex + 2 + currentIndexKey.remaining();
                dataSizeFromIndex = nextRowPositionFromIndex - dataStartFromIndex;
            }
            // avoid an NPE if key is null
            String keyName = key == null ? "(unreadable key)" : ByteBufferUtil.bytesToHex(key.getKey());
            outputHandler.debug(String.format("row %s is %s", keyName, FBUtilities.prettyPrintMemory(dataSizeFromIndex)));
            assert currentIndexKey != null || !indexAvailable();
            try {
                if (key == null)
                    throw new IOError(new IOException("Unable to read row key from data file"));
                if (currentIndexKey != null && !key.getKey().equals(currentIndexKey)) {
                    throw new IOError(new IOException(String.format("Key from data file (%s) does not match key from index file (%s)", //ByteBufferUtil.bytesToHex(key.getKey()), ByteBufferUtil.bytesToHex(currentIndexKey))));
                    "_too big_", ByteBufferUtil.bytesToHex(currentIndexKey))));
                }
                if (indexFile != null && dataSizeFromIndex > dataFile.length())
                    throw new IOError(new IOException("Impossible row size (greater than file length): " + dataSizeFromIndex));
                if (indexFile != null && dataStart != dataStartFromIndex)
                    outputHandler.warn(String.format("Data file row position %d differs from index file row position %d", dataStart, dataStartFromIndex));
                if (tryAppend(prevKey, key, writer))
                    prevKey = key;
            } catch (Throwable th) {
                throwIfFatal(th);
                outputHandler.warn("Error reading row (stacktrace follows):", th);
                if (currentIndexKey != null && (key == null || !key.getKey().equals(currentIndexKey) || dataStart != dataStartFromIndex)) {
                    outputHandler.output(String.format("Retrying from row index; data is %s bytes starting at %s", dataSizeFromIndex, dataStartFromIndex));
                    key = sstable.decorateKey(currentIndexKey);
                    try {
                        dataFile.seek(dataStartFromIndex);
                        if (tryAppend(prevKey, key, writer))
                            prevKey = key;
                    } catch (Throwable th2) {
                        throwIfFatal(th2);
                        throwIfCannotContinue(key, th2);
                        outputHandler.warn("Retry failed too. Skipping to next row (retry's stacktrace follows)", th2);
                        badRows++;
                        seekToNextRow();
                    }
                } else {
                    throwIfCannotContinue(key, th);
                    outputHandler.warn("Row starting at position " + dataStart + " is unreadable; skipping to next");
                    badRows++;
                    if (currentIndexKey != null)
                        seekToNextRow();
                }
            }
        }
        if (!outOfOrder.isEmpty()) {
            // out of order rows, but no bad rows found - we can keep our repairedAt time
            long repairedAt = badRows > 0 ? ActiveRepairService.UNREPAIRED_SSTABLE : metadata.repairedAt;
            SSTableReader newInOrderSstable;
            try (SSTableWriter inOrderWriter = CompactionManager.createWriter(cfs, destination, expectedBloomFilterSize, repairedAt, metadata.pendingRepair, sstable, transaction)) {
                for (Partition partition : outOfOrder) inOrderWriter.append(partition.unfilteredIterator());
                newInOrderSstable = inOrderWriter.finish(-1, sstable.maxDataAge, true);
            }
            transaction.update(newInOrderSstable, false);
            finished.add(newInOrderSstable);
            outputHandler.warn(String.format("%d out of order rows found while scrubbing %s; Those have been written (in order) to a new sstable (%s)", outOfOrder.size(), sstable, newInOrderSstable));
        }
        // finish obsoletes the old sstable
        finished.addAll(writer.setRepairedAt(badRows > 0 ? ActiveRepairService.UNREPAIRED_SSTABLE : sstable.getSSTableMetadata().repairedAt).finish());
        completed = true;
    } catch (IOException e) {
        throw Throwables.propagate(e);
    } finally {
        if (transaction.isOffline())
            finished.forEach(sstable -> sstable.selfRef().release());
    }
    if (completed) {
        if (badRows > 0)
            outputHandler.warn("No valid rows found while scrubbing " + sstable + "; it is marked for deletion now. If you want to attempt manual recovery, you can find a copy in the pre-scrub snapshot");
        else
            outputHandler.output("Scrub of " + sstable + " complete; looks like all " + emptyRows + " rows were tombstoned");
    } else {
        outputHandler.output("Scrub of " + sstable + " complete: " + goodRows + " rows in new sstable and " + emptyRows + " empty (tombstoned) rows dropped");
        if (badRows > 0)
            outputHandler.warn("Unable to recover " + badRows + " rows that were skipped.  You can attempt manual recovery from the pre-scrub snapshot.  You can also run nodetool repair to transfer the data from a healthy replica, if any");
    }
}
Also used : StatsMetadata(org.apache.cassandra.io.sstable.metadata.StatsMetadata) java.util(java.util) org.apache.cassandra.io.sstable(org.apache.cassandra.io.sstable) org.apache.cassandra.db(org.apache.cassandra.db) Throwables(com.google.common.base.Throwables) org.apache.cassandra.utils(org.apache.cassandra.utils) ByteBuffer(java.nio.ByteBuffer) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) org.apache.cassandra.db.rows(org.apache.cassandra.db.rows) ActiveRepairService(org.apache.cassandra.service.ActiveRepairService) java.io(java.io) LifecycleTransaction(org.apache.cassandra.db.lifecycle.LifecycleTransaction) FileUtils(org.apache.cassandra.io.util.FileUtils) TableMetadata(org.apache.cassandra.schema.TableMetadata) RandomAccessReader(org.apache.cassandra.io.util.RandomAccessReader) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StatsMetadata(org.apache.cassandra.io.sstable.metadata.StatsMetadata) SSTableWriter(org.apache.cassandra.io.sstable.format.SSTableWriter) org.apache.cassandra.db.partitions(org.apache.cassandra.db.partitions) SSTableWriter(org.apache.cassandra.io.sstable.format.SSTableWriter) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader)

Example 3 with SSTableWriter

use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.

the class SplittingSizeTieredCompactionWriter method switchCompactionLocation.

@Override
public void switchCompactionLocation(Directories.DataDirectory location) {
    this.location = location;
    long currentPartitionsToWrite = Math.round(ratios[currentRatioIndex] * estimatedTotalKeys);
    @SuppressWarnings("resource") SSTableWriter writer = SSTableWriter.create(cfs.newSSTableDescriptor(getDirectories().getLocationForDisk(location)), currentPartitionsToWrite, minRepairedAt, pendingRepair, cfs.metadata, new MetadataCollector(allSSTables, cfs.metadata().comparator, 0), SerializationHeader.make(cfs.metadata(), nonExpiredSSTables), cfs.indexManager.listIndexes(), txn);
    logger.trace("Switching writer, currentPartitionsToWrite = {}", currentPartitionsToWrite);
    sstableWriter.switchWriter(writer);
}
Also used : SSTableWriter(org.apache.cassandra.io.sstable.format.SSTableWriter) MetadataCollector(org.apache.cassandra.io.sstable.metadata.MetadataCollector)

Example 4 with SSTableWriter

use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.

the class SSTableRewriter method doPrepare.

protected void doPrepare() {
    switchWriter(null);
    if (throwEarly)
        throw new RuntimeException("exception thrown early in finish, for testing");
    // No early open to finalize and replace
    for (SSTableWriter writer : writers) {
        assert writer.getFilePointer() > 0;
        writer.setRepairedAt(repairedAt).setOpenResult(true).prepareToCommit();
        SSTableReader reader = writer.finished();
        transaction.update(reader, false);
        preparedForCommit.add(reader);
    }
    transaction.checkpoint();
    if (throwLate)
        throw new RuntimeException("exception thrown after all sstables finished, for testing");
    if (!keepOriginals)
        transaction.obsoleteOriginals();
    transaction.prepareToCommit();
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) SSTableWriter(org.apache.cassandra.io.sstable.format.SSTableWriter)

Example 5 with SSTableWriter

use of org.apache.cassandra.io.sstable.format.SSTableWriter in project cassandra by apache.

the class SSTableWriterTest method testAbortTxnWithClosedWriterShouldRemoveSSTable.

@Test
public void testAbortTxnWithClosedWriterShouldRemoveSSTable() throws InterruptedException {
    Keyspace keyspace = Keyspace.open(KEYSPACE);
    ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(CF);
    truncate(cfs);
    File dir = cfs.getDirectories().getDirectoryForNewSSTables();
    LifecycleTransaction txn = LifecycleTransaction.offline(OperationType.STREAM);
    try (SSTableWriter writer = getWriter(cfs, dir, txn)) {
        for (int i = 0; i < 10000; i++) {
            UpdateBuilder builder = UpdateBuilder.create(cfs.metadata(), random(i, 10)).withTimestamp(1);
            for (int j = 0; j < 100; j++) builder.newRow("" + j).add("val", ByteBuffer.allocate(1000));
            writer.append(builder.build().unfilteredIterator());
        }
        assertFileCounts(dir.list());
        for (int i = 10000; i < 20000; i++) {
            UpdateBuilder builder = UpdateBuilder.create(cfs.metadata(), random(i, 10)).withTimestamp(1);
            for (int j = 0; j < 100; j++) builder.newRow("" + j).add("val", ByteBuffer.allocate(1000));
            writer.append(builder.build().unfilteredIterator());
        }
        SSTableReader sstable = writer.finish(true);
        int datafiles = assertFileCounts(dir.list());
        assertEquals(datafiles, 1);
        sstable.selfRef().release();
        // open till .abort() is called (via the builder)
        if (!FBUtilities.isWindows) {
            LifecycleTransaction.waitForDeletions();
            assertFileCounts(dir.list());
        }
        txn.abort();
        LifecycleTransaction.waitForDeletions();
        datafiles = assertFileCounts(dir.list());
        assertEquals(datafiles, 0);
        validateCFS(cfs);
    }
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) SSTableWriter(org.apache.cassandra.io.sstable.format.SSTableWriter) LifecycleTransaction(org.apache.cassandra.db.lifecycle.LifecycleTransaction) File(java.io.File) Test(org.junit.Test)

Aggregations

SSTableWriter (org.apache.cassandra.io.sstable.format.SSTableWriter)9 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)6 LifecycleTransaction (org.apache.cassandra.db.lifecycle.LifecycleTransaction)5 File (java.io.File)4 Test (org.junit.Test)4 MetadataCollector (org.apache.cassandra.io.sstable.metadata.MetadataCollector)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Throwables (com.google.common.base.Throwables)1 java.io (java.io)1 ByteBuffer (java.nio.ByteBuffer)1 java.util (java.util)1 org.apache.cassandra.db (org.apache.cassandra.db)1 org.apache.cassandra.db.partitions (org.apache.cassandra.db.partitions)1 org.apache.cassandra.db.rows (org.apache.cassandra.db.rows)1 org.apache.cassandra.io.sstable (org.apache.cassandra.io.sstable)1 StatsMetadata (org.apache.cassandra.io.sstable.metadata.StatsMetadata)1 FileUtils (org.apache.cassandra.io.util.FileUtils)1 RandomAccessReader (org.apache.cassandra.io.util.RandomAccessReader)1 TableMetadata (org.apache.cassandra.schema.TableMetadata)1 ActiveRepairService (org.apache.cassandra.service.ActiveRepairService)1