Search in sources :

Example 1 with UnfilteredRowIterator

use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.

the class CompactionManager method doValidationCompaction.

/**
     * Performs a readonly "compaction" of all sstables in order to validate complete rows,
     * but without writing the merge result
     */
@SuppressWarnings("resource")
private void doValidationCompaction(ColumnFamilyStore cfs, Validator validator) throws IOException {
    // concurrently with other compactions, it would otherwise go ahead and scan those again.
    if (!cfs.isValid())
        return;
    Refs<SSTableReader> sstables = null;
    try {
        int gcBefore;
        int nowInSec = FBUtilities.nowInSeconds();
        UUID parentRepairSessionId = validator.desc.parentSessionId;
        String snapshotName;
        boolean isGlobalSnapshotValidation = cfs.snapshotExists(parentRepairSessionId.toString());
        if (isGlobalSnapshotValidation)
            snapshotName = parentRepairSessionId.toString();
        else
            snapshotName = validator.desc.sessionId.toString();
        boolean isSnapshotValidation = cfs.snapshotExists(snapshotName);
        if (isSnapshotValidation) {
            // If there is a snapshot created for the session then read from there.
            // note that we populate the parent repair session when creating the snapshot, meaning the sstables in the snapshot are the ones we
            // are supposed to validate.
            sstables = cfs.getSnapshotSSTableReaders(snapshotName);
            // Computing gcbefore based on the current time wouldn't be very good because we know each replica will execute
            // this at a different time (that's the whole purpose of repair with snaphsot). So instead we take the creation
            // time of the snapshot, which should give us roughtly the same time on each replica (roughtly being in that case
            // 'as good as in the non-snapshot' case)
            gcBefore = cfs.gcBefore((int) (cfs.getSnapshotCreationTime(snapshotName) / 1000));
        } else {
            if (!validator.isConsistent) {
                // flush first so everyone is validating data that is as similar as possible
                StorageService.instance.forceKeyspaceFlush(cfs.keyspace.getName(), cfs.name);
            }
            sstables = getSSTablesToValidate(cfs, validator);
            if (sstables == null)
                // this means the parent repair session was removed - the repair session failed on another node and we removed it
                return;
            if (validator.gcBefore > 0)
                gcBefore = validator.gcBefore;
            else
                gcBefore = getDefaultGcBefore(cfs, nowInSec);
        }
        // Create Merkle trees suitable to hold estimated partitions for the given ranges.
        // We blindly assume that a partition is evenly distributed on all sstables for now.
        MerkleTrees tree = createMerkleTrees(sstables, validator.desc.ranges, cfs);
        long start = System.nanoTime();
        try (AbstractCompactionStrategy.ScannerList scanners = cfs.getCompactionStrategyManager().getScanners(sstables, validator.desc.ranges);
            ValidationCompactionController controller = new ValidationCompactionController(cfs, gcBefore);
            CompactionIterator ci = new ValidationCompactionIterator(scanners.scanners, controller, nowInSec, metrics)) {
            // validate the CF as we iterate over it
            validator.prepare(cfs, tree);
            while (ci.hasNext()) {
                if (ci.isStopRequested())
                    throw new CompactionInterruptedException(ci.getCompactionInfo());
                try (UnfilteredRowIterator partition = ci.next()) {
                    validator.add(partition);
                }
            }
            validator.complete();
        } finally {
            if (isSnapshotValidation && !isGlobalSnapshotValidation) {
                // we can only clear the snapshot if we are not doing a global snapshot validation (we then clear it once anticompaction
                // is done).
                cfs.clearSnapshot(snapshotName);
            }
        }
        if (logger.isDebugEnabled()) {
            long duration = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
            logger.debug("Validation finished in {} msec, for {}", duration, validator.desc);
        }
    } finally {
        if (sstables != null)
            sstables.release();
    }
}
Also used : UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader)

Example 2 with UnfilteredRowIterator

use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.

the class CompactionManager method antiCompactGroup.

private int antiCompactGroup(ColumnFamilyStore cfs, Collection<Range<Token>> ranges, LifecycleTransaction anticompactionGroup, long repairedAt, UUID pendingRepair) {
    long groupMaxDataAge = -1;
    for (Iterator<SSTableReader> i = anticompactionGroup.originals().iterator(); i.hasNext(); ) {
        SSTableReader sstable = i.next();
        if (groupMaxDataAge < sstable.maxDataAge)
            groupMaxDataAge = sstable.maxDataAge;
    }
    if (anticompactionGroup.originals().size() == 0) {
        logger.info("No valid anticompactions for this group, All sstables were compacted and are no longer available");
        return 0;
    }
    logger.info("Anticompacting {}", anticompactionGroup);
    Set<SSTableReader> sstableAsSet = anticompactionGroup.originals();
    File destination = cfs.getDirectories().getWriteableLocationAsFile(cfs.getExpectedCompactedFileSize(sstableAsSet, OperationType.ANTICOMPACTION));
    long repairedKeyCount = 0;
    long unrepairedKeyCount = 0;
    int nowInSec = FBUtilities.nowInSeconds();
    CompactionStrategyManager strategy = cfs.getCompactionStrategyManager();
    try (SSTableRewriter repairedSSTableWriter = SSTableRewriter.constructWithoutEarlyOpening(anticompactionGroup, false, groupMaxDataAge);
        SSTableRewriter unRepairedSSTableWriter = SSTableRewriter.constructWithoutEarlyOpening(anticompactionGroup, false, groupMaxDataAge);
        AbstractCompactionStrategy.ScannerList scanners = strategy.getScanners(anticompactionGroup.originals());
        CompactionController controller = new CompactionController(cfs, sstableAsSet, getDefaultGcBefore(cfs, nowInSec));
        CompactionIterator ci = new CompactionIterator(OperationType.ANTICOMPACTION, scanners.scanners, controller, nowInSec, UUIDGen.getTimeUUID(), metrics)) {
        int expectedBloomFilterSize = Math.max(cfs.metadata().params.minIndexInterval, (int) (SSTableReader.getApproximateKeyCount(sstableAsSet)));
        repairedSSTableWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, repairedAt, pendingRepair, sstableAsSet, anticompactionGroup));
        unRepairedSSTableWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, ActiveRepairService.UNREPAIRED_SSTABLE, null, sstableAsSet, anticompactionGroup));
        Range.OrderedRangeContainmentChecker containmentChecker = new Range.OrderedRangeContainmentChecker(ranges);
        while (ci.hasNext()) {
            try (UnfilteredRowIterator partition = ci.next()) {
                // if current range from sstable is repaired, save it into the new repaired sstable
                if (containmentChecker.contains(partition.partitionKey().getToken())) {
                    repairedSSTableWriter.append(partition);
                    repairedKeyCount++;
                } else // otherwise save into the new 'non-repaired' table
                {
                    unRepairedSSTableWriter.append(partition);
                    unrepairedKeyCount++;
                }
            }
        }
        List<SSTableReader> anticompactedSSTables = new ArrayList<>();
        // since both writers are operating over the same Transaction, we cannot use the convenience Transactional.finish() method,
        // as on the second finish() we would prepareToCommit() on a Transaction that has already been committed, which is forbidden by the API
        // (since it indicates misuse). We call permitRedundantTransitions so that calls that transition to a state already occupied are permitted.
        anticompactionGroup.permitRedundantTransitions();
        repairedSSTableWriter.setRepairedAt(repairedAt).prepareToCommit();
        unRepairedSSTableWriter.prepareToCommit();
        anticompactedSSTables.addAll(repairedSSTableWriter.finished());
        anticompactedSSTables.addAll(unRepairedSSTableWriter.finished());
        repairedSSTableWriter.commit();
        unRepairedSSTableWriter.commit();
        logger.trace("Repaired {} keys out of {} for {}/{} in {}", repairedKeyCount, repairedKeyCount + unrepairedKeyCount, cfs.keyspace.getName(), cfs.getTableName(), anticompactionGroup);
        return anticompactedSSTables.size();
    } catch (Throwable e) {
        JVMStabilityInspector.inspectThrowable(e);
        logger.error("Error anticompacting " + anticompactionGroup, e);
    }
    return 0;
}
Also used : UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) SSTableRewriter(org.apache.cassandra.io.sstable.SSTableRewriter) Range(org.apache.cassandra.dht.Range) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) File(java.io.File)

Example 3 with UnfilteredRowIterator

use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.

the class Verifier method verify.

public void verify(boolean extended) throws IOException {
    long rowStart = 0;
    outputHandler.output(String.format("Verifying %s (%s)", sstable, FBUtilities.prettyPrintMemory(dataFile.length())));
    outputHandler.output(String.format("Checking computed hash of %s ", sstable));
    // Verify will use the Digest files, which works for both compressed and uncompressed sstables
    try {
        validator = null;
        if (new File(sstable.descriptor.filenameFor(Component.DIGEST)).exists()) {
            validator = DataIntegrityMetadata.fileDigestValidator(sstable.descriptor);
            validator.validate();
        } else {
            outputHandler.output("Data digest missing, assuming extended verification of disk values");
            extended = true;
        }
    } catch (IOException e) {
        outputHandler.debug(e.getMessage());
        markAndThrow();
    } finally {
        FileUtils.closeQuietly(validator);
    }
    if (!extended)
        return;
    outputHandler.output("Extended Verify requested, proceeding to inspect values");
    try {
        ByteBuffer nextIndexKey = ByteBufferUtil.readWithShortLength(indexFile);
        {
            long firstRowPositionFromIndex = rowIndexEntrySerializer.deserializePositionAndSkip(indexFile);
            if (firstRowPositionFromIndex != 0)
                markAndThrow();
        }
        DecoratedKey prevKey = null;
        while (!dataFile.isEOF()) {
            if (verifyInfo.isStopRequested())
                throw new CompactionInterruptedException(verifyInfo.getCompactionInfo());
            rowStart = dataFile.getFilePointer();
            outputHandler.debug("Reading row at " + rowStart);
            DecoratedKey key = null;
            try {
                key = sstable.decorateKey(ByteBufferUtil.readWithShortLength(dataFile));
            } catch (Throwable th) {
                throwIfFatal(th);
            // check for null key below
            }
            ByteBuffer currentIndexKey = nextIndexKey;
            long nextRowPositionFromIndex = 0;
            try {
                nextIndexKey = indexFile.isEOF() ? null : ByteBufferUtil.readWithShortLength(indexFile);
                nextRowPositionFromIndex = indexFile.isEOF() ? dataFile.length() : rowIndexEntrySerializer.deserializePositionAndSkip(indexFile);
            } catch (Throwable th) {
                markAndThrow();
            }
            long dataStart = dataFile.getFilePointer();
            long dataStartFromIndex = currentIndexKey == null ? -1 : rowStart + 2 + currentIndexKey.remaining();
            long dataSize = nextRowPositionFromIndex - dataStartFromIndex;
            // avoid an NPE if key is null
            String keyName = key == null ? "(unreadable key)" : ByteBufferUtil.bytesToHex(key.getKey());
            outputHandler.debug(String.format("row %s is %s", keyName, FBUtilities.prettyPrintMemory(dataSize)));
            assert currentIndexKey != null || indexFile.isEOF();
            try {
                if (key == null || dataSize > dataFile.length())
                    markAndThrow();
                //mimic the scrub read path
                try (UnfilteredRowIterator iterator = SSTableIdentityIterator.create(sstable, dataFile, key)) {
                }
                if ((prevKey != null && prevKey.compareTo(key) > 0) || !key.getKey().equals(currentIndexKey) || dataStart != dataStartFromIndex)
                    markAndThrow();
                goodRows++;
                prevKey = key;
                outputHandler.debug(String.format("Row %s at %s valid, moving to next row at %s ", goodRows, rowStart, nextRowPositionFromIndex));
                dataFile.seek(nextRowPositionFromIndex);
            } catch (Throwable th) {
                badRows++;
                markAndThrow();
            }
        }
    } catch (Throwable t) {
        throw Throwables.propagate(t);
    } finally {
        controller.close();
    }
    outputHandler.output("Verify of " + sstable + " succeeded. All " + goodRows + " rows read successfully");
}
Also used : UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) IOException(java.io.IOException) File(java.io.File) ByteBuffer(java.nio.ByteBuffer)

Example 4 with UnfilteredRowIterator

use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.

the class CassandraIndexSearcher method search.

// Both the OpOrder and 'indexIter' are closed on exception, or through the closing of the result
@SuppressWarnings("resource")
public // of this method.
UnfilteredPartitionIterator search(ReadExecutionController executionController) {
    // the value of the index expression is the partition key in the index table
    DecoratedKey indexKey = index.getBackingTable().get().decorateKey(expression.getIndexValue());
    UnfilteredRowIterator indexIter = queryIndex(indexKey, command, executionController);
    try {
        return queryDataFromIndex(indexKey, UnfilteredRowIterators.filter(indexIter, command.nowInSec()), command, executionController);
    } catch (RuntimeException | Error e) {
        indexIter.close();
        throw e;
    }
}
Also used : UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator)

Example 5 with UnfilteredRowIterator

use of org.apache.cassandra.db.rows.UnfilteredRowIterator in project cassandra by apache.

the class LongLeveledCompactionStrategyTest method testLeveledScanner.

@Test
public void testLeveledScanner() throws Exception {
    Keyspace keyspace = Keyspace.open(KEYSPACE1);
    ColumnFamilyStore store = keyspace.getColumnFamilyStore(CF_STANDARDLVL2);
    // 100 KB value, make it easy to have multiple files
    ByteBuffer value = ByteBuffer.wrap(new byte[100 * 1024]);
    // Enough data to have a level 1 and 2
    int rows = 128;
    int columns = 10;
    // Adds enough data to trigger multiple sstable per level
    for (int r = 0; r < rows; r++) {
        DecoratedKey key = Util.dk(String.valueOf(r));
        UpdateBuilder builder = UpdateBuilder.create(store.metadata(), key);
        for (int c = 0; c < columns; c++) builder.newRow("column" + c).add("val", value);
        Mutation rm = new Mutation(builder.build());
        rm.apply();
        store.forceBlockingFlush();
    }
    LeveledCompactionStrategyTest.waitForLeveling(store);
    store.disableAutoCompaction();
    CompactionStrategyManager mgr = store.getCompactionStrategyManager();
    LeveledCompactionStrategy lcs = (LeveledCompactionStrategy) mgr.getStrategies().get(1).get(0);
    // 10 KB value
    value = ByteBuffer.wrap(new byte[10 * 1024]);
    // Adds 10 partitions
    for (int r = 0; r < 10; r++) {
        DecoratedKey key = Util.dk(String.valueOf(r));
        UpdateBuilder builder = UpdateBuilder.create(store.metadata(), key);
        for (int c = 0; c < 10; c++) builder.newRow("column" + c).add("val", value);
        Mutation rm = new Mutation(builder.build());
        rm.apply();
    }
    //Flush sstable
    store.forceBlockingFlush();
    store.runWithCompactionsDisabled(new Callable<Void>() {

        public Void call() throws Exception {
            Iterable<SSTableReader> allSSTables = store.getSSTables(SSTableSet.LIVE);
            for (SSTableReader sstable : allSSTables) {
                if (sstable.getSSTableLevel() == 0) {
                    System.out.println("Mutating L0-SSTABLE level to L1 to simulate a bug: " + sstable.getFilename());
                    sstable.descriptor.getMetadataSerializer().mutateLevel(sstable.descriptor, 1);
                    sstable.reloadSSTableMetadata();
                }
            }
            try (AbstractCompactionStrategy.ScannerList scannerList = lcs.getScanners(Lists.newArrayList(allSSTables))) {
                //Verify that leveled scanners will always iterate in ascending order (CASSANDRA-9935)
                for (ISSTableScanner scanner : scannerList.scanners) {
                    DecoratedKey lastKey = null;
                    while (scanner.hasNext()) {
                        UnfilteredRowIterator row = scanner.next();
                        if (lastKey != null) {
                            assertTrue("row " + row.partitionKey() + " received out of order wrt " + lastKey, row.partitionKey().compareTo(lastKey) >= 0);
                        }
                        lastKey = row.partitionKey();
                    }
                }
            }
            return null;
        }
    }, true, true);
}
Also used : ISSTableScanner(org.apache.cassandra.io.sstable.ISSTableScanner) UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) UpdateBuilder(org.apache.cassandra.UpdateBuilder) ByteBuffer(java.nio.ByteBuffer) ConfigurationException(org.apache.cassandra.exceptions.ConfigurationException) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) Test(org.junit.Test)

Aggregations

UnfilteredRowIterator (org.apache.cassandra.db.rows.UnfilteredRowIterator)19 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)11 Test (org.junit.Test)6 File (java.io.File)5 ISSTableScanner (org.apache.cassandra.io.sstable.ISSTableScanner)5 UnfilteredPartitionIterator (org.apache.cassandra.db.partitions.UnfilteredPartitionIterator)4 Row (org.apache.cassandra.db.rows.Row)4 Range (org.apache.cassandra.dht.Range)4 ByteBuffer (java.nio.ByteBuffer)3 LifecycleTransaction (org.apache.cassandra.db.lifecycle.LifecycleTransaction)3 IOException (java.io.IOException)2 UUID (java.util.UUID)2 ClusteringIndexSliceFilter (org.apache.cassandra.db.filter.ClusteringIndexSliceFilter)2 AbstractRow (org.apache.cassandra.db.rows.AbstractRow)2 Cell (org.apache.cassandra.db.rows.Cell)2 Unfiltered (org.apache.cassandra.db.rows.Unfiltered)2 BytesToken (org.apache.cassandra.dht.ByteOrderedPartitioner.BytesToken)2 Token (org.apache.cassandra.dht.Token)2 ConfigurationException (org.apache.cassandra.exceptions.ConfigurationException)2 SSTableRewriter (org.apache.cassandra.io.sstable.SSTableRewriter)2