Search in sources :

Example 1 with FileDataInput

use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.

the class CompactionController method getShadowIterator.

// caller to close
@SuppressWarnings("resource")
private UnfilteredRowIterator getShadowIterator(SSTableReader reader, DecoratedKey key, boolean tombstoneOnly) {
    if (reader.isMarkedSuspect() || reader.getMaxTimestamp() <= minTimestamp || tombstoneOnly && !reader.hasTombstones())
        return null;
    RowIndexEntry<?> position = reader.getPosition(key, SSTableReader.Operator.EQ);
    if (position == null)
        return null;
    FileDataInput dfile = openDataFiles.computeIfAbsent(reader, this::openDataFile);
    return reader.simpleIterator(dfile, key, position, tombstoneOnly);
}
Also used : FileDataInput(org.apache.cassandra.io.util.FileDataInput)

Example 2 with FileDataInput

use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.

the class BigTableReader method getPosition.

/**
     * @param key The key to apply as the rhs to the given Operator. A 'fake' key is allowed to
     * allow key selection by token bounds but only if op != * EQ
     * @param op The Operator defining matching keys: the nearest key to the target matching the operator wins.
     * @param updateCacheAndStats true if updating stats and cache
     * @return The index entry corresponding to the key, or null if the key is not present
     */
protected RowIndexEntry getPosition(PartitionPosition key, Operator op, boolean updateCacheAndStats, boolean permitMatchPastLast) {
    if (op == Operator.EQ) {
        // EQ only make sense if the key is a valid row key
        assert key instanceof DecoratedKey;
        if (!bf.isPresent((DecoratedKey) key)) {
            Tracing.trace("Bloom filter allows skipping sstable {}", descriptor.generation);
            return null;
        }
    }
    // next, the key cache (only make sense for valid row key)
    if ((op == Operator.EQ || op == Operator.GE) && (key instanceof DecoratedKey)) {
        DecoratedKey decoratedKey = (DecoratedKey) key;
        KeyCacheKey cacheKey = new KeyCacheKey(metadata(), descriptor, decoratedKey.getKey());
        RowIndexEntry cachedPosition = getCachedPosition(cacheKey, updateCacheAndStats);
        if (cachedPosition != null) {
            Tracing.trace("Key cache hit for sstable {}", descriptor.generation);
            return cachedPosition;
        }
    }
    // check the smallest and greatest keys in the sstable to see if it can't be present
    boolean skip = false;
    if (key.compareTo(first) < 0) {
        if (op == Operator.EQ)
            skip = true;
        else
            key = first;
        op = Operator.EQ;
    } else {
        int l = last.compareTo(key);
        // l <= 0  => we may be looking past the end of the file; we then narrow our behaviour to:
        //             1) skipping if strictly greater for GE and EQ;
        //             2) skipping if equal and searching GT, and we aren't permitting matching past last
        skip = l <= 0 && (l < 0 || (!permitMatchPastLast && op == Operator.GT));
    }
    if (skip) {
        if (op == Operator.EQ && updateCacheAndStats)
            bloomFilterTracker.addFalsePositive();
        Tracing.trace("Check against min and max keys allows skipping sstable {}", descriptor.generation);
        return null;
    }
    int binarySearchResult = indexSummary.binarySearch(key);
    long sampledPosition = getIndexScanPositionFromBinarySearchResult(binarySearchResult, indexSummary);
    int sampledIndex = getIndexSummaryIndexFromBinarySearchResult(binarySearchResult);
    int effectiveInterval = indexSummary.getEffectiveIndexIntervalAfterIndex(sampledIndex);
    if (ifile == null)
        return null;
    // scan the on-disk index, starting at the nearest sampled position.
    // The check against IndexInterval is to be exit the loop in the EQ case when the key looked for is not present
    // (bloom filter false positive). But note that for non-EQ cases, we might need to check the first key of the
    // next index position because the searched key can be greater the last key of the index interval checked if it
    // is lesser than the first key of next interval (and in that case we must return the position of the first key
    // of the next interval).
    int i = 0;
    String path = null;
    try (FileDataInput in = ifile.createReader(sampledPosition)) {
        path = in.getPath();
        while (!in.isEOF()) {
            i++;
            ByteBuffer indexKey = ByteBufferUtil.readWithShortLength(in);
            // did we find an appropriate position for the op requested
            boolean opSatisfied;
            // is the current position an exact match for the key, suitable for caching
            boolean exactMatch;
            // Compare raw keys if possible for performance, otherwise compare decorated keys.
            if (op == Operator.EQ && i <= effectiveInterval) {
                opSatisfied = exactMatch = indexKey.equals(((DecoratedKey) key).getKey());
            } else {
                DecoratedKey indexDecoratedKey = decorateKey(indexKey);
                int comparison = indexDecoratedKey.compareTo(key);
                int v = op.apply(comparison);
                opSatisfied = (v == 0);
                exactMatch = (comparison == 0);
                if (v < 0) {
                    Tracing.trace("Partition index lookup allows skipping sstable {}", descriptor.generation);
                    return null;
                }
            }
            if (opSatisfied) {
                // read data position from index entry
                RowIndexEntry indexEntry = rowIndexEntrySerializer.deserialize(in, in.getFilePointer());
                if (exactMatch && updateCacheAndStats) {
                    // key can be == to the index key only if it's a true row key
                    assert key instanceof DecoratedKey;
                    DecoratedKey decoratedKey = (DecoratedKey) key;
                    if (logger.isTraceEnabled()) {
                        // expensive sanity check!  see CASSANDRA-4687
                        try (FileDataInput fdi = dfile.createReader(indexEntry.position)) {
                            DecoratedKey keyInDisk = decorateKey(ByteBufferUtil.readWithShortLength(fdi));
                            if (!keyInDisk.equals(key))
                                throw new AssertionError(String.format("%s != %s in %s", keyInDisk, key, fdi.getPath()));
                        }
                    }
                    // store exact match for the key
                    cacheKey(decoratedKey, indexEntry);
                }
                if (op == Operator.EQ && updateCacheAndStats)
                    bloomFilterTracker.addTruePositive();
                Tracing.trace("Partition index with {} entries found for sstable {}", indexEntry.columnsIndexCount(), descriptor.generation);
                return indexEntry;
            }
            RowIndexEntry.Serializer.skip(in, descriptor.version);
        }
    } catch (IOException e) {
        markSuspect();
        throw new CorruptSSTableException(e, path);
    }
    if (op == SSTableReader.Operator.EQ && updateCacheAndStats)
        bloomFilterTracker.addFalsePositive();
    Tracing.trace("Partition index lookup complete (bloom filter false positive) for sstable {}", descriptor.generation);
    return null;
}
Also used : FileDataInput(org.apache.cassandra.io.util.FileDataInput) KeyCacheKey(org.apache.cassandra.cache.KeyCacheKey) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer)

Example 3 with FileDataInput

use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.

the class SegmentReaderTest method compressedSegmenter.

private void compressedSegmenter(ICompressor compressor) throws IOException {
    int rawSize = (1 << 15) - 137;
    ByteBuffer plainTextBuffer = compressor.preferredBufferType().allocate(rawSize);
    byte[] b = new byte[rawSize];
    random.nextBytes(b);
    plainTextBuffer.put(b);
    plainTextBuffer.flip();
    // need to add in the plain text size to the block we write out
    int uncompressedHeaderSize = 4;
    int length = compressor.initialCompressedBufferLength(rawSize);
    ByteBuffer compBuffer = ByteBufferUtil.ensureCapacity(null, length + uncompressedHeaderSize, true, compressor.preferredBufferType());
    compBuffer.putInt(rawSize);
    compressor.compress(plainTextBuffer, compBuffer);
    compBuffer.flip();
    File compressedFile = File.createTempFile("compressed-segment-", ".log");
    compressedFile.deleteOnExit();
    FileOutputStream fos = new FileOutputStream(compressedFile);
    fos.getChannel().write(compBuffer);
    fos.close();
    try (RandomAccessReader reader = RandomAccessReader.open(compressedFile)) {
        CompressedSegmenter segmenter = new CompressedSegmenter(compressor, reader);
        int fileLength = (int) compressedFile.length();
        SyncSegment syncSegment = segmenter.nextSegment(0, fileLength);
        FileDataInput fileDataInput = syncSegment.input;
        ByteBuffer fileBuffer = readBytes(fileDataInput, rawSize);
        plainTextBuffer.flip();
        Assert.assertEquals(plainTextBuffer, fileBuffer);
        // CompressedSegmenter includes the Sync header length in the syncSegment.endPosition (value)
        Assert.assertEquals(rawSize, syncSegment.endPosition - CommitLogSegment.SYNC_MARKER_SIZE);
    }
}
Also used : FileDataInput(org.apache.cassandra.io.util.FileDataInput) RandomAccessReader(org.apache.cassandra.io.util.RandomAccessReader) FileOutputStream(java.io.FileOutputStream) ByteBuffer(java.nio.ByteBuffer) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) CompressedSegmenter(org.apache.cassandra.db.commitlog.CommitLogSegmentReader.CompressedSegmenter) SyncSegment(org.apache.cassandra.db.commitlog.CommitLogSegmentReader.SyncSegment)

Example 4 with FileDataInput

use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.

the class SSTableFlushObserverTest method testFlushObserver.

@Test
public void testFlushObserver() {
    TableMetadata cfm = TableMetadata.builder(KS_NAME, CF_NAME).addPartitionKeyColumn("id", UTF8Type.instance).addRegularColumn("first_name", UTF8Type.instance).addRegularColumn("age", Int32Type.instance).addRegularColumn("height", LongType.instance).build();
    LifecycleTransaction transaction = LifecycleTransaction.offline(OperationType.COMPACTION);
    FlushObserver observer = new FlushObserver();
    String sstableDirectory = DatabaseDescriptor.getAllDataFileLocations()[0];
    File directory = new File(sstableDirectory + File.pathSeparator + KS_NAME + File.pathSeparator + CF_NAME);
    directory.deleteOnExit();
    if (!directory.exists() && !directory.mkdirs())
        throw new FSWriteError(new IOException("failed to create tmp directory"), directory.getAbsolutePath());
    SSTableFormat.Type sstableFormat = SSTableFormat.Type.current();
    BigTableWriter writer = new BigTableWriter(new Descriptor(sstableFormat.info.getLatestVersion(), directory, KS_NAME, CF_NAME, 0, sstableFormat), 10L, 0L, null, TableMetadataRef.forOfflineTools(cfm), new MetadataCollector(cfm.comparator).sstableLevel(0), new SerializationHeader(true, cfm, cfm.regularAndStaticColumns(), EncodingStats.NO_STATS), Collections.singletonList(observer), transaction);
    SSTableReader reader = null;
    Multimap<ByteBuffer, Cell> expected = ArrayListMultimap.create();
    try {
        final long now = System.currentTimeMillis();
        ByteBuffer key = UTF8Type.instance.fromString("key1");
        expected.putAll(key, Arrays.asList(BufferCell.live(getColumn(cfm, "age"), now, Int32Type.instance.decompose(27)), BufferCell.live(getColumn(cfm, "first_name"), now, UTF8Type.instance.fromString("jack")), BufferCell.live(getColumn(cfm, "height"), now, LongType.instance.decompose(183L))));
        writer.append(new RowIterator(cfm, key.duplicate(), Collections.singletonList(buildRow(expected.get(key)))));
        key = UTF8Type.instance.fromString("key2");
        expected.putAll(key, Arrays.asList(BufferCell.live(getColumn(cfm, "age"), now, Int32Type.instance.decompose(30)), BufferCell.live(getColumn(cfm, "first_name"), now, UTF8Type.instance.fromString("jim")), BufferCell.live(getColumn(cfm, "height"), now, LongType.instance.decompose(180L))));
        writer.append(new RowIterator(cfm, key, Collections.singletonList(buildRow(expected.get(key)))));
        key = UTF8Type.instance.fromString("key3");
        expected.putAll(key, Arrays.asList(BufferCell.live(getColumn(cfm, "age"), now, Int32Type.instance.decompose(30)), BufferCell.live(getColumn(cfm, "first_name"), now, UTF8Type.instance.fromString("ken")), BufferCell.live(getColumn(cfm, "height"), now, LongType.instance.decompose(178L))));
        writer.append(new RowIterator(cfm, key, Collections.singletonList(buildRow(expected.get(key)))));
        reader = writer.finish(true);
    } finally {
        FileUtils.closeQuietly(writer);
    }
    Assert.assertTrue(observer.isComplete);
    Assert.assertEquals(expected.size(), observer.rows.size());
    for (Pair<ByteBuffer, Long> e : observer.rows.keySet()) {
        ByteBuffer key = e.left;
        Long indexPosition = e.right;
        try (FileDataInput index = reader.ifile.createReader(indexPosition)) {
            ByteBuffer indexKey = ByteBufferUtil.readWithShortLength(index);
            Assert.assertEquals(0, UTF8Type.instance.compare(key, indexKey));
        } catch (IOException ex) {
            throw new FSReadError(ex, reader.getIndexFilename());
        }
        Assert.assertEquals(expected.get(key), observer.rows.get(e));
    }
}
Also used : TableMetadata(org.apache.cassandra.schema.TableMetadata) FSWriteError(org.apache.cassandra.io.FSWriteError) LifecycleTransaction(org.apache.cassandra.db.lifecycle.LifecycleTransaction) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) FileDataInput(org.apache.cassandra.io.util.FileDataInput) SerializationHeader(org.apache.cassandra.db.SerializationHeader) FSReadError(org.apache.cassandra.io.FSReadError) BigTableWriter(org.apache.cassandra.io.sstable.format.big.BigTableWriter) Descriptor(org.apache.cassandra.io.sstable.Descriptor) DatabaseDescriptor(org.apache.cassandra.config.DatabaseDescriptor) MetadataCollector(org.apache.cassandra.io.sstable.metadata.MetadataCollector) File(java.io.File) Test(org.junit.Test)

Example 5 with FileDataInput

use of org.apache.cassandra.io.util.FileDataInput in project cassandra by apache.

the class SSTableReaderTest method testSpannedIndexPositions.

@Test
public void testSpannedIndexPositions() throws IOException {
    int originalMaxSegmentSize = MmappedRegions.MAX_SEGMENT_SIZE;
    // each index entry is ~11 bytes, so this will generate lots of segments
    MmappedRegions.MAX_SEGMENT_SIZE = 40;
    try {
        Keyspace keyspace = Keyspace.open(KEYSPACE1);
        ColumnFamilyStore store = keyspace.getColumnFamilyStore("Standard1");
        partitioner = store.getPartitioner();
        // insert a bunch of data and compact to a single sstable
        CompactionManager.instance.disableAutoCompaction();
        for (int j = 0; j < 100; j += 2) {
            new RowUpdateBuilder(store.metadata(), j, String.valueOf(j)).clustering("0").add("val", ByteBufferUtil.EMPTY_BYTE_BUFFER).build().applyUnsafe();
        }
        store.forceBlockingFlush();
        CompactionManager.instance.performMaximal(store, false);
        // check that all our keys are found correctly
        SSTableReader sstable = store.getLiveSSTables().iterator().next();
        for (int j = 0; j < 100; j += 2) {
            DecoratedKey dk = Util.dk(String.valueOf(j));
            FileDataInput file = sstable.getFileDataInput(sstable.getPosition(dk, SSTableReader.Operator.EQ).position);
            DecoratedKey keyInDisk = sstable.decorateKey(ByteBufferUtil.readWithShortLength(file));
            assert keyInDisk.equals(dk) : String.format("%s != %s in %s", keyInDisk, dk, file.getPath());
        }
        // check no false positives
        for (int j = 1; j < 110; j += 2) {
            DecoratedKey dk = Util.dk(String.valueOf(j));
            assert sstable.getPosition(dk, SSTableReader.Operator.EQ) == null;
        }
    } finally {
        MmappedRegions.MAX_SEGMENT_SIZE = originalMaxSegmentSize;
    }
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) FileDataInput(org.apache.cassandra.io.util.FileDataInput) Test(org.junit.Test)

Aggregations

FileDataInput (org.apache.cassandra.io.util.FileDataInput)8 ByteBuffer (java.nio.ByteBuffer)6 Test (org.junit.Test)3 File (java.io.File)2 IOException (java.io.IOException)2 FileOutputStream (java.io.FileOutputStream)1 RandomAccessFile (java.io.RandomAccessFile)1 NavigableSet (java.util.NavigableSet)1 SortedSet (java.util.SortedSet)1 TreeSet (java.util.TreeSet)1 KeyCacheKey (org.apache.cassandra.cache.KeyCacheKey)1 DatabaseDescriptor (org.apache.cassandra.config.DatabaseDescriptor)1 EVTComparator (org.apache.cassandra.db.Column.EVTComparator)1 SerializationHeader (org.apache.cassandra.db.SerializationHeader)1 CompressedSegmenter (org.apache.cassandra.db.commitlog.CommitLogSegmentReader.CompressedSegmenter)1 SyncSegment (org.apache.cassandra.db.commitlog.CommitLogSegmentReader.SyncSegment)1 QueryPath (org.apache.cassandra.db.filter.QueryPath)1 LifecycleTransaction (org.apache.cassandra.db.lifecycle.LifecycleTransaction)1 PendingTransactionColumn (org.apache.cassandra.db.transaction.PendingTransactionColumn)1 FSReadError (org.apache.cassandra.io.FSReadError)1