use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.
the class CompactionManager method doValidationCompaction.
/**
* Performs a readonly "compaction" of all sstables in order to validate complete rows,
* but without writing the merge result
*/
@SuppressWarnings("resource")
private void doValidationCompaction(ColumnFamilyStore cfs, Validator validator) throws IOException {
// concurrently with other compactions, it would otherwise go ahead and scan those again.
if (!cfs.isValid())
return;
Refs<SSTableReader> sstables = null;
try {
int gcBefore;
int nowInSec = FBUtilities.nowInSeconds();
UUID parentRepairSessionId = validator.desc.parentSessionId;
String snapshotName;
boolean isGlobalSnapshotValidation = cfs.snapshotExists(parentRepairSessionId.toString());
if (isGlobalSnapshotValidation)
snapshotName = parentRepairSessionId.toString();
else
snapshotName = validator.desc.sessionId.toString();
boolean isSnapshotValidation = cfs.snapshotExists(snapshotName);
if (isSnapshotValidation) {
// If there is a snapshot created for the session then read from there.
// note that we populate the parent repair session when creating the snapshot, meaning the sstables in the snapshot are the ones we
// are supposed to validate.
sstables = cfs.getSnapshotSSTableReaders(snapshotName);
// Computing gcbefore based on the current time wouldn't be very good because we know each replica will execute
// this at a different time (that's the whole purpose of repair with snaphsot). So instead we take the creation
// time of the snapshot, which should give us roughtly the same time on each replica (roughtly being in that case
// 'as good as in the non-snapshot' case)
gcBefore = cfs.gcBefore((int) (cfs.getSnapshotCreationTime(snapshotName) / 1000));
} else {
if (!validator.isConsistent) {
// flush first so everyone is validating data that is as similar as possible
StorageService.instance.forceKeyspaceFlush(cfs.keyspace.getName(), cfs.name);
}
sstables = getSSTablesToValidate(cfs, validator);
if (sstables == null)
// this means the parent repair session was removed - the repair session failed on another node and we removed it
return;
if (validator.gcBefore > 0)
gcBefore = validator.gcBefore;
else
gcBefore = getDefaultGcBefore(cfs, nowInSec);
}
// Create Merkle trees suitable to hold estimated partitions for the given ranges.
// We blindly assume that a partition is evenly distributed on all sstables for now.
MerkleTrees tree = createMerkleTrees(sstables, validator.desc.ranges, cfs);
long start = System.nanoTime();
try (AbstractCompactionStrategy.ScannerList scanners = cfs.getCompactionStrategyManager().getScanners(sstables, validator.desc.ranges);
ValidationCompactionController controller = new ValidationCompactionController(cfs, gcBefore);
CompactionIterator ci = new ValidationCompactionIterator(scanners.scanners, controller, nowInSec, metrics)) {
// validate the CF as we iterate over it
validator.prepare(cfs, tree);
while (ci.hasNext()) {
if (ci.isStopRequested())
throw new CompactionInterruptedException(ci.getCompactionInfo());
try (UnfilteredRowIterator partition = ci.next()) {
validator.add(partition);
}
}
validator.complete();
} finally {
if (isSnapshotValidation && !isGlobalSnapshotValidation) {
// we can only clear the snapshot if we are not doing a global snapshot validation (we then clear it once anticompaction
// is done).
cfs.clearSnapshot(snapshotName);
}
}
if (logger.isDebugEnabled()) {
long duration = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
logger.debug("Validation finished in {} msec, for {}", duration, validator.desc);
}
} finally {
if (sstables != null)
sstables.release();
}
}
use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.
the class CompactionManager method antiCompactGroup.
private int antiCompactGroup(ColumnFamilyStore cfs, Collection<Range<Token>> ranges, LifecycleTransaction anticompactionGroup, long repairedAt, UUID pendingRepair) {
long groupMaxDataAge = -1;
for (Iterator<SSTableReader> i = anticompactionGroup.originals().iterator(); i.hasNext(); ) {
SSTableReader sstable = i.next();
if (groupMaxDataAge < sstable.maxDataAge)
groupMaxDataAge = sstable.maxDataAge;
}
if (anticompactionGroup.originals().size() == 0) {
logger.info("No valid anticompactions for this group, All sstables were compacted and are no longer available");
return 0;
}
logger.info("Anticompacting {}", anticompactionGroup);
Set<SSTableReader> sstableAsSet = anticompactionGroup.originals();
File destination = cfs.getDirectories().getWriteableLocationAsFile(cfs.getExpectedCompactedFileSize(sstableAsSet, OperationType.ANTICOMPACTION));
long repairedKeyCount = 0;
long unrepairedKeyCount = 0;
int nowInSec = FBUtilities.nowInSeconds();
CompactionStrategyManager strategy = cfs.getCompactionStrategyManager();
try (SSTableRewriter repairedSSTableWriter = SSTableRewriter.constructWithoutEarlyOpening(anticompactionGroup, false, groupMaxDataAge);
SSTableRewriter unRepairedSSTableWriter = SSTableRewriter.constructWithoutEarlyOpening(anticompactionGroup, false, groupMaxDataAge);
AbstractCompactionStrategy.ScannerList scanners = strategy.getScanners(anticompactionGroup.originals());
CompactionController controller = new CompactionController(cfs, sstableAsSet, getDefaultGcBefore(cfs, nowInSec));
CompactionIterator ci = new CompactionIterator(OperationType.ANTICOMPACTION, scanners.scanners, controller, nowInSec, UUIDGen.getTimeUUID(), metrics)) {
int expectedBloomFilterSize = Math.max(cfs.metadata().params.minIndexInterval, (int) (SSTableReader.getApproximateKeyCount(sstableAsSet)));
repairedSSTableWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, repairedAt, pendingRepair, sstableAsSet, anticompactionGroup));
unRepairedSSTableWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, ActiveRepairService.UNREPAIRED_SSTABLE, null, sstableAsSet, anticompactionGroup));
Range.OrderedRangeContainmentChecker containmentChecker = new Range.OrderedRangeContainmentChecker(ranges);
while (ci.hasNext()) {
try (UnfilteredRowIterator partition = ci.next()) {
// if current range from sstable is repaired, save it into the new repaired sstable
if (containmentChecker.contains(partition.partitionKey().getToken())) {
repairedSSTableWriter.append(partition);
repairedKeyCount++;
} else // otherwise save into the new 'non-repaired' table
{
unRepairedSSTableWriter.append(partition);
unrepairedKeyCount++;
}
}
}
List<SSTableReader> anticompactedSSTables = new ArrayList<>();
// since both writers are operating over the same Transaction, we cannot use the convenience Transactional.finish() method,
// as on the second finish() we would prepareToCommit() on a Transaction that has already been committed, which is forbidden by the API
// (since it indicates misuse). We call permitRedundantTransitions so that calls that transition to a state already occupied are permitted.
anticompactionGroup.permitRedundantTransitions();
repairedSSTableWriter.setRepairedAt(repairedAt).prepareToCommit();
unRepairedSSTableWriter.prepareToCommit();
anticompactedSSTables.addAll(repairedSSTableWriter.finished());
anticompactedSSTables.addAll(unRepairedSSTableWriter.finished());
repairedSSTableWriter.commit();
unRepairedSSTableWriter.commit();
logger.trace("Repaired {} keys out of {} for {}/{} in {}", repairedKeyCount, repairedKeyCount + unrepairedKeyCount, cfs.keyspace.getName(), cfs.getTableName(), anticompactionGroup);
return anticompactedSSTables.size();
} catch (Throwable e) {
JVMStabilityInspector.inspectThrowable(e);
logger.error("Error anticompacting " + anticompactionGroup, e);
}
return 0;
}
use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.
the class Verifier method verify.
public void verify(boolean extended) throws IOException {
long rowStart = 0;
outputHandler.output(String.format("Verifying %s (%s)", sstable, FBUtilities.prettyPrintMemory(dataFile.length())));
outputHandler.output(String.format("Checking computed hash of %s ", sstable));
// Verify will use the Digest files, which works for both compressed and uncompressed sstables
try {
validator = null;
if (new File(sstable.descriptor.filenameFor(Component.DIGEST)).exists()) {
validator = DataIntegrityMetadata.fileDigestValidator(sstable.descriptor);
validator.validate();
} else {
outputHandler.output("Data digest missing, assuming extended verification of disk values");
extended = true;
}
} catch (IOException e) {
outputHandler.debug(e.getMessage());
markAndThrow();
} finally {
FileUtils.closeQuietly(validator);
}
if (!extended)
return;
outputHandler.output("Extended Verify requested, proceeding to inspect values");
try {
ByteBuffer nextIndexKey = ByteBufferUtil.readWithShortLength(indexFile);
{
long firstRowPositionFromIndex = rowIndexEntrySerializer.deserializePositionAndSkip(indexFile);
if (firstRowPositionFromIndex != 0)
markAndThrow();
}
DecoratedKey prevKey = null;
while (!dataFile.isEOF()) {
if (verifyInfo.isStopRequested())
throw new CompactionInterruptedException(verifyInfo.getCompactionInfo());
rowStart = dataFile.getFilePointer();
outputHandler.debug("Reading row at " + rowStart);
DecoratedKey key = null;
try {
key = sstable.decorateKey(ByteBufferUtil.readWithShortLength(dataFile));
} catch (Throwable th) {
throwIfFatal(th);
// check for null key below
}
ByteBuffer currentIndexKey = nextIndexKey;
long nextRowPositionFromIndex = 0;
try {
nextIndexKey = indexFile.isEOF() ? null : ByteBufferUtil.readWithShortLength(indexFile);
nextRowPositionFromIndex = indexFile.isEOF() ? dataFile.length() : rowIndexEntrySerializer.deserializePositionAndSkip(indexFile);
} catch (Throwable th) {
markAndThrow();
}
long dataStart = dataFile.getFilePointer();
long dataStartFromIndex = currentIndexKey == null ? -1 : rowStart + 2 + currentIndexKey.remaining();
long dataSize = nextRowPositionFromIndex - dataStartFromIndex;
// avoid an NPE if key is null
String keyName = key == null ? "(unreadable key)" : ByteBufferUtil.bytesToHex(key.getKey());
outputHandler.debug(String.format("row %s is %s", keyName, FBUtilities.prettyPrintMemory(dataSize)));
assert currentIndexKey != null || indexFile.isEOF();
try {
if (key == null || dataSize > dataFile.length())
markAndThrow();
//mimic the scrub read path
try (UnfilteredRowIterator iterator = SSTableIdentityIterator.create(sstable, dataFile, key)) {
}
if ((prevKey != null && prevKey.compareTo(key) > 0) || !key.getKey().equals(currentIndexKey) || dataStart != dataStartFromIndex)
markAndThrow();
goodRows++;
prevKey = key;
outputHandler.debug(String.format("Row %s at %s valid, moving to next row at %s ", goodRows, rowStart, nextRowPositionFromIndex));
dataFile.seek(nextRowPositionFromIndex);
} catch (Throwable th) {
badRows++;
markAndThrow();
}
}
} catch (Throwable t) {
throw Throwables.propagate(t);
} finally {
controller.close();
}
outputHandler.output("Verify of " + sstable + " succeeded. All " + goodRows + " rows read successfully");
}
use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.
the class CassandraIndexSearcher method search.
// Both the OpOrder and 'indexIter' are closed on exception, or through the closing of the result
@SuppressWarnings("resource")
public // of this method.
UnfilteredPartitionIterator search(ReadExecutionController executionController) {
// the value of the index expression is the partition key in the index table
DecoratedKey indexKey = index.getBackingTable().get().decorateKey(expression.getIndexValue());
UnfilteredRowIterator indexIter = queryIndex(indexKey, command, executionController);
try {
return queryDataFromIndex(indexKey, UnfilteredRowIterators.filter(indexIter, command.nowInSec()), command, executionController);
} catch (RuntimeException | Error e) {
indexIter.close();
throw e;
}
}
use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.
the class LongLeveledCompactionStrategyTest method testLeveledScanner.
@Test
public void testLeveledScanner() throws Exception {
Keyspace keyspace = Keyspace.open(KEYSPACE1);
ColumnFamilyStore store = keyspace.getColumnFamilyStore(CF_STANDARDLVL2);
// 100 KB value, make it easy to have multiple files
ByteBuffer value = ByteBuffer.wrap(new byte[100 * 1024]);
// Enough data to have a level 1 and 2
int rows = 128;
int columns = 10;
// Adds enough data to trigger multiple sstable per level
for (int r = 0; r < rows; r++) {
DecoratedKey key = Util.dk(String.valueOf(r));
UpdateBuilder builder = UpdateBuilder.create(store.metadata(), key);
for (int c = 0; c < columns; c++) builder.newRow("column" + c).add("val", value);
Mutation rm = new Mutation(builder.build());
rm.apply();
store.forceBlockingFlush();
}
LeveledCompactionStrategyTest.waitForLeveling(store);
store.disableAutoCompaction();
CompactionStrategyManager mgr = store.getCompactionStrategyManager();
LeveledCompactionStrategy lcs = (LeveledCompactionStrategy) mgr.getStrategies().get(1).get(0);
// 10 KB value
value = ByteBuffer.wrap(new byte[10 * 1024]);
// Adds 10 partitions
for (int r = 0; r < 10; r++) {
DecoratedKey key = Util.dk(String.valueOf(r));
UpdateBuilder builder = UpdateBuilder.create(store.metadata(), key);
for (int c = 0; c < 10; c++) builder.newRow("column" + c).add("val", value);
Mutation rm = new Mutation(builder.build());
rm.apply();
}
//Flush sstable
store.forceBlockingFlush();
store.runWithCompactionsDisabled(new Callable<Void>() {
public Void call() throws Exception {
Iterable<SSTableReader> allSSTables = store.getSSTables(SSTableSet.LIVE);
for (SSTableReader sstable : allSSTables) {
if (sstable.getSSTableLevel() == 0) {
System.out.println("Mutating L0-SSTABLE level to L1 to simulate a bug: " + sstable.getFilename());
sstable.descriptor.getMetadataSerializer().mutateLevel(sstable.descriptor, 1);
sstable.reloadSSTableMetadata();
}
}
try (AbstractCompactionStrategy.ScannerList scannerList = lcs.getScanners(Lists.newArrayList(allSSTables))) {
//Verify that leveled scanners will always iterate in ascending order (CASSANDRA-9935)
for (ISSTableScanner scanner : scannerList.scanners) {
DecoratedKey lastKey = null;
while (scanner.hasNext()) {
UnfilteredRowIterator row = scanner.next();
if (lastKey != null) {
assertTrue("row " + row.partitionKey() + " received out of order wrt " + lastKey, row.partitionKey().compareTo(lastKey) >= 0);
}
lastKey = row.partitionKey();
}
}
}
return null;
}
}, true, true);
}
Aggregations