Example 1 with CacheKey

the class StreamSegmentReadIndex method readDirect.

the class StreamSegmentReadIndex method readDirect.

 * Reads a contiguous sequence of bytes of the given length starting at the given offset. Every byte in the range
 * must meet the following conditions:
 * <ul>
 * <li> It must exist in this segment. This excludes bytes from merged transactions and future reads.
 * <li> It must be part of data that is not yet committed to Storage (tail part) - as such, it must be fully in the cache.
 * </ul>
 * Note: This method will not cause cache statistics to be updated. As such, Cache entry generations will not be
 * updated for those entries that are touched.
 * @param startOffset The offset in the StreamSegment where to start reading.
 * @param length      The number of bytes to read.
 * @return An InputStream containing the requested data, or null if all of the conditions of this read cannot be met.
 * @throws IllegalStateException    If the read index is in recovery mode.
 * @throws IllegalArgumentException If the parameters are invalid (offset, length or offset+length are not in the Segment's range).
InputStream readDirect(long startOffset, int length) {
    Exceptions.checkNotClosed(this.closed, this);
    Preconditions.checkState(!this.recoveryMode, "StreamSegmentReadIndex is in Recovery Mode.");
    Preconditions.checkArgument(length >= 0, "length must be a non-negative number");
    Preconditions.checkArgument(startOffset >= this.metadata.getStorageLength(), "startOffset must refer to an offset beyond the Segment's StorageLength offset.");
    Preconditions.checkArgument(startOffset + length <= this.metadata.getLength(), "startOffset+length must be less than the length of the Segment.");
    Preconditions.checkArgument(startOffset >= Math.min(this.metadata.getStartOffset(), this.metadata.getStorageLength()), "startOffset is before the Segment's StartOffset.");
    // Get the first entry. This one is trickier because the requested start offset may not fall on an entry boundary.
    CompletableReadResultEntry nextEntry;
    synchronized (this.lock) {
        ReadIndexEntry indexEntry = this.indexEntries.getFloor(startOffset);
        if (indexEntry == null || startOffset > indexEntry.getLastStreamSegmentOffset() || !indexEntry.isDataEntry()) {
            // Data not available or data exist in a partially merged transaction.
            return null;
        } else {
            // Fetch data from the cache for the first entry, but do not update the cache hit stats.
            nextEntry = createMemoryRead(indexEntry, startOffset, length, false);
    // Since we know all entries should be in the cache and are contiguous, there is no need
    assert Futures.isSuccessful(nextEntry.getContent()) : "Found CacheReadResultEntry that is not completed yet: " + nextEntry;
    val entryContents = nextEntry.getContent().join();
    ArrayList<InputStream> contents = new ArrayList<>();
    int readLength = entryContents.getLength();
    while (readLength < length) {
        // No need to search the index; from now on, we know each offset we are looking for is at the beginning of a cache entry.
        // Also, no need to acquire the lock there. The cache itself is thread safe, and if the entry we are about to fetch
        // has just been evicted, we'll just get null back and stop reading (which is acceptable).
        byte[] entryData = this.cache.get(new CacheKey(this.metadata.getId(), startOffset + readLength));
        if (entryData == null) {
            // Could not find the 'next' cache entry: this means the requested range is not fully cached.
            return null;
        int entryReadLength = Math.min(entryData.length, length - readLength);
        assert entryReadLength > 0 : "about to have fetched zero bytes from a cache entry";
        contents.add(new ByteArrayInputStream(entryData, 0, entryReadLength));
        readLength += entryReadLength;
    // Coalesce the results into a single InputStream and return the result.
    return new SequenceInputStream(Iterators.asEnumeration(contents.iterator()));
Also used : lombok.val(lombok.val) SequenceInputStream( ByteArrayInputStream( ByteArrayInputStream( SequenceInputStream( InputStream( ArrayList(java.util.ArrayList) CacheKey(io.pravega.segmentstore.server.CacheKey)

Example 2 with CacheKey

the class StreamSegmentReadIndex method append.

the class StreamSegmentReadIndex method append.

// endregion
// region Index Updates
 * Appends the given range of bytes at the given offset.
 * @param offset The offset within the StreamSegment to append at.
 * @param data   The range of bytes to append.
 * @throws NullPointerException     If data is null.
 * @throws IllegalArgumentException If the operation would cause writing beyond the StreamSegment's Length.
 * @throws IllegalArgumentException If the offset is invalid (does not match the previous append offset).
void append(long offset, byte[] data) {
    Exceptions.checkNotClosed(this.closed, this);
    Preconditions.checkState(!isMerged(), "StreamSegment has been merged into a different one. Cannot append more ReadIndex entries.");
    if (data.length == 0) {
        // Nothing to do. Adding empty read entries will only make our system slower and harder to debug.
    // Metadata check can be done outside the write lock.
    // Adding at the end means that we always need to "catch-up" with Length. Check to see if adding
    // this entry will make us catch up to it or not.
    long length = this.metadata.getLength();
    long endOffset = offset + data.length;
    Exceptions.checkArgument(endOffset <= length, "offset", "The given range of bytes (%d-%d) is beyond the StreamSegment Length (%d).", offset, endOffset, length);
    // Then append an entry for it in the ReadIndex. It's ok to insert into the cache outside of the lock here,
    // since there is no chance of competing with another write request for the same offset at the same time.
    this.cache.insert(new CacheKey(this.metadata.getId(), offset), data);
    appendEntry(new CacheIndexEntry(offset, data.length));
Also used : CacheKey(io.pravega.segmentstore.server.CacheKey)

Example 3 with CacheKey

the class ContainerReadIndexTests method testTruncate.

the class ContainerReadIndexTests method testTruncate.

 * Tests a scenario of truncation that does not happen concurrently with reading (segments are pre-truncated).
public void testTruncate() throws Exception {
    // We use a custom ReadIndexConfig that allows more than one generation. This helps us verify that truncated entries
    // are actually evicted.
    val config = ReadIndexConfig.builder().with(ReadIndexConfig.MEMORY_READ_MIN_LENGTH, DEFAULT_CONFIG.getMemoryReadMinLength()).with(ReadIndexConfig.STORAGE_READ_ALIGNMENT, DEFAULT_CONFIG.getStorageReadAlignment()).with(ReadIndexConfig.CACHE_POLICY_MAX_SIZE, Long.MAX_VALUE).with(ReadIndexConfig.CACHE_POLICY_MAX_TIME, 1000000).with(ReadIndexConfig.CACHE_POLICY_GENERATION_TIME, 10000).build();
    @Cleanup TestContext context = new TestContext(config, config.getCachePolicy());
    ArrayList<Long> segmentIds = createSegments(context);
    HashMap<Long, ByteArrayOutputStream> segmentContents = new HashMap<>();
    appendData(segmentIds, segmentContents, context);
    // Truncate all segments at their mid-points.
    for (int i = 0; i < segmentIds.size(); i++) {
        val sm = context.metadata.getStreamSegmentMetadata(segmentIds.get(i));
        sm.setStartOffset(sm.getLength() / 2);
        if (i % 2 == 0) {
        } else {
            sm.setStorageLength(sm.getStartOffset() / 2);
    // Check all the appended data. This includes verifying access to already truncated offsets.
    checkReadIndex("PostTruncate", segmentContents, context);
    checkReadIndexDirect(segmentContents, context);
    // Verify that truncated data is eligible for eviction, by checking that at least one Cache Entry is being removed.
    for (long segmentId : segmentIds) {
        val sm = context.metadata.getStreamSegmentMetadata(segmentId);
        // We need to set this in order to verify cache evictions.
    HashSet<CacheKey> removedKeys = new HashSet<>();
    context.cacheFactory.cache.removeCallback = removedKeys::add;
    AssertExtensions.assertGreaterThan("Expected at least one cache entry to be removed.", 0, removedKeys.size());
Also used : lombok.val(lombok.val) HashMap(java.util.HashMap) ByteArrayOutputStream( Cleanup(lombok.Cleanup) AtomicLong(java.util.concurrent.atomic.AtomicLong) CacheKey(io.pravega.segmentstore.server.CacheKey) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with CacheKey

the class ContainerReadIndexTests method testCacheEviction.

the class ContainerReadIndexTests method testCacheEviction.

 * Tests the ability to evict entries from the ReadIndex under various conditions:
 * * If an entry is aged out
 * * If an entry is pushed out because of cache space pressure.
 * <p>
 * This also verifies that certain entries, such as RedirectReadIndexEntries and entries after the Storage Offset are
 * not removed.
 * <p>
 * The way this test goes is as follows (it's pretty subtle, because there aren't many ways to hook into the ReadIndex and see what it's doing)
 * 1. It creates a bunch of segments, and populates them in storage (each) up to offset N/2-1 (this is called pre-storage)
 * 2. It populates the ReadIndex for each of those segments from offset N/2 to offset N-1 (this is called post-storage)
 * 3. It loads all the data from Storage into the ReadIndex, in entries of size equal to those already loaded in step #2.
 * 3a. At this point, all the entries added in step #2 have Generations 0..A/4-1, and step #3 have generations A/4..A-1
 * 4. Append more data at the end. This forces the generation to increase to 1.25A.
 * 4a. Nothing should be evicted from the cache now, since the earliest items are all post-storage.
 * 5. We 'touch' (read) the first 1/3 of pre-storage entries (offsets 0..N/4).
 * 5a. At this point, those entries (offsets 0..N/6) will have the newest generations (1.25A..1.5A)
 * 6. We append more data (equivalent to the data we touched)
 * 6a. Nothing should be evicted, since those generations that were just eligible for removal were touched and bumped up.
 * 7. We forcefully increase the current generation by 1 (without touching the ReadIndex)
 * 7a. At this point, we expect all the pre-storage items, except the touched ones, to be evicted. This is generations 0.25A-0.75A.
 * 8. Update the metadata and indicate that all the post-storage entries are now pre-storage and bump the generation by 0.75A.
 * 8a. At this point, we expect all former post-storage items and pre-storage items to be evicted (in this order).
 * <p>
 * The final order of eviction (in terms of offsets, for each segment), is:
 * * 0.25N-0.75N, 0.75N..N, N..1.25N, 0..0.25N, 1.25N..1.5N (remember that we added quite a bunch of items after the initial run).
public void testCacheEviction() throws Exception {
    // Create a CachePolicy with a set number of generations and a known max size.
    // Each generation contains exactly one entry, so the number of generations is also the number of entries.
    final int appendSize = 100;
    // This also doubles as number of generations (each generation, we add one append for each segment).
    final int entriesPerSegment = 100;
    final int cacheMaxSize = SEGMENT_COUNT * entriesPerSegment * appendSize;
    // 25% of the entries are beyond the StorageOffset
    final int postStorageEntryCount = entriesPerSegment / 4;
    // 75% of the entries are before the StorageOffset.
    final int preStorageEntryCount = entriesPerSegment - postStorageEntryCount;
    CachePolicy cachePolicy = new CachePolicy(cacheMaxSize, Duration.ofMillis(1000 * 2 * entriesPerSegment), Duration.ofMillis(1000));
    // To properly test this, we want predictable storage reads.
    ReadIndexConfig config = ConfigHelpers.withInfiniteCachePolicy(ReadIndexConfig.builder().with(ReadIndexConfig.STORAGE_READ_ALIGNMENT, appendSize)).build();
    ArrayList<CacheKey> removedKeys = new ArrayList<>();
    @Cleanup TestContext context = new TestContext(config, cachePolicy);
    // Record every cache removal.
    context.cacheFactory.cache.removeCallback = removedKeys::add;
    // Create the segments (metadata + storage).
    ArrayList<Long> segmentIds = createSegments(context);
    // Populate the Storage with appropriate data.
    byte[] preStorageData = new byte[preStorageEntryCount * appendSize];
    for (long segmentId : segmentIds) {
        UpdateableSegmentMetadata sm = context.metadata.getStreamSegmentMetadata(segmentId);
        val handle =;, 0, new ByteArrayInputStream(preStorageData), preStorageData.length, TIMEOUT).join();
    // Callback that appends one entry at the end of the given segment id.
    Consumer<Long> appendOneEntry = segmentId -> {
        UpdateableSegmentMetadata sm = context.metadata.getStreamSegmentMetadata(segmentId);
        byte[] data = new byte[appendSize];
        long offset = sm.getLength();
        sm.setLength(offset + data.length);
        try {
            context.readIndex.append(segmentId, offset, data);
        } catch (StreamSegmentNotExistsException ex) {
            throw new CompletionException(ex);
    // Populate the ReadIndex with the Append entries (post-StorageOffset)
    for (int i = 0; i < postStorageEntryCount; i++) {
        // Each time we make a round of appends (one per segment), we increment the generation in the CacheManager.
    // Read all the data from Storage, making sure we carefully associate them with the proper generation.
    for (int i = 0; i < preStorageEntryCount; i++) {
        long offset = i * appendSize;
        for (long segmentId : segmentIds) {
            @Cleanup ReadResult result =, offset, appendSize, TIMEOUT);
            ReadResultEntry resultEntry =;
            Assert.assertEquals("Unexpected type of ReadResultEntry when trying to load up data into the ReadIndex Cache.", ReadResultEntryType.Storage, resultEntry.getType());
            ReadResultEntryContents contents = resultEntry.getContent().get(TIMEOUT.toMillis(), TimeUnit.MILLISECONDS);
            Assert.assertFalse("Not expecting more data to be available for reading.", result.hasNext());
            Assert.assertEquals("Unexpected ReadResultEntry length when trying to load up data into the ReadIndex Cache.", appendSize, contents.getLength());
    Assert.assertEquals("Not expecting any removed Cache entries at this point (cache is not full).", 0, removedKeys.size());
    // Append more data (equivalent to all post-storage entries), and verify that NO entries are being evicted (we cannot evict post-storage entries).
    for (int i = 0; i < postStorageEntryCount; i++) {
    Assert.assertEquals("Not expecting any removed Cache entries at this point (only eligible entries were post-storage).", 0, removedKeys.size());
    // 'Touch' the first few entries read from storage. This should move them to the back of the queue (they won't be the first ones to be evicted).
    int touchCount = preStorageEntryCount / 3;
    for (int i = 0; i < touchCount; i++) {
        long offset = i * appendSize;
        for (long segmentId : segmentIds) {
            @Cleanup ReadResult result =, offset, appendSize, TIMEOUT);
            ReadResultEntry resultEntry =;
            Assert.assertEquals("Unexpected type of ReadResultEntry when trying to load up data into the ReadIndex Cache.", ReadResultEntryType.Cache, resultEntry.getType());
    // Append more data (equivalent to the amount of data we 'touched'), and verify that the entries we just touched are not being removed..
    for (int i = 0; i < touchCount; i++) {
    Assert.assertEquals("Not expecting any removed Cache entries at this point (we touched old entries and they now have the newest generation).", 0, removedKeys.size());
    // Increment the generations so that we are caught up to just before the generation where the "touched" items now live.
    // We expect all but the 'touchCount' pre-Storage entries to be removed.
    int expectedRemovalCount = (preStorageEntryCount - touchCount) * SEGMENT_COUNT;
    Assert.assertEquals("Unexpected number of removed entries after having forced out all pre-storage entries.", expectedRemovalCount, removedKeys.size());
    // Now update the metadata and indicate that all the post-storage data has been moved to storage.
    segmentIds.forEach(segmentId -> {
        UpdateableSegmentMetadata sm = context.metadata.getStreamSegmentMetadata(segmentId);
    // We add one artificial entry, which we'll be touching forever and ever; this forces the CacheManager to
    // update its current generation every time. We will be ignoring this entry for our test.
    SegmentMetadata readSegment = context.metadata.getStreamSegmentMetadata(segmentIds.get(0));
    // Now evict everything (whether by size of by aging out).
    for (int i = 0; i < cachePolicy.getMaxGenerations(); i++) {
        @Cleanup ReadResult result =, readSegment.getLength() - appendSize, appendSize, TIMEOUT);;
    int expectedRemovalCountPerSegment = entriesPerSegment + touchCount + postStorageEntryCount;
    int expectedTotalRemovalCount = SEGMENT_COUNT * expectedRemovalCountPerSegment;
    Assert.assertEquals("Unexpected number of removed entries after having forced out all the entries.", expectedTotalRemovalCount, removedKeys.size());
    // Finally, verify that the evicted items are in the correct order (for each segment). See this test's description for details.
    for (long segmentId : segmentIds) {
        List<CacheKey> segmentRemovedKeys = -> key.getStreamSegmentId() == segmentId).collect(Collectors.toList());
        Assert.assertEquals("Unexpected number of removed entries for segment " + segmentId, expectedRemovalCountPerSegment, segmentRemovedKeys.size());
        // The correct order of eviction (N=entriesPerSegment) is: 0.25N-0.75N, 0.75N..N, N..1.25N, 0..0.25N, 1.25N..1.5N.
        // This is equivalent to the following tests
        // 0.25N-1.25N
        checkOffsets(segmentRemovedKeys, segmentId, 0, entriesPerSegment, entriesPerSegment * appendSize / 4, appendSize);
        // 0..0.25N
        checkOffsets(segmentRemovedKeys, segmentId, entriesPerSegment, entriesPerSegment / 4, 0, appendSize);
        // 1.25N..1.5N
        checkOffsets(segmentRemovedKeys, segmentId, entriesPerSegment + entriesPerSegment / 4, entriesPerSegment / 4, (int) (entriesPerSegment * appendSize * 1.25), appendSize);
Also used : Storage( StreamSegmentNotExistsException(io.pravega.segmentstore.contracts.StreamSegmentNotExistsException) AssertExtensions(io.pravega.test.common.AssertExtensions) CacheKey(io.pravega.segmentstore.server.CacheKey) Cleanup(lombok.Cleanup) Random(java.util.Random) InMemoryCache( UpdateableSegmentMetadata(io.pravega.segmentstore.server.UpdateableSegmentMetadata) StreamSegmentSealedException(io.pravega.segmentstore.contracts.StreamSegmentSealedException) ByteArrayInputStream( AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ReadResultEntryContents(io.pravega.segmentstore.contracts.ReadResultEntryContents) InMemoryStorageFactory( Duration(java.time.Duration) Map(java.util.Map) CacheFactory( Collection(java.util.Collection) CompletionException(java.util.concurrent.CompletionException) ReadResultEntryType(io.pravega.segmentstore.contracts.ReadResultEntryType) UUID(java.util.UUID) Collectors( StreamSegmentNameUtils(io.pravega.shared.segment.StreamSegmentNameUtils) UncheckedIOException( List(java.util.List) ThreadPooledTestSuite(io.pravega.test.common.ThreadPooledTestSuite) Futures(io.pravega.common.concurrent.Futures) ReadResult(io.pravega.segmentstore.contracts.ReadResult) MetadataBuilder(io.pravega.segmentstore.server.MetadataBuilder) ByteArrayOutputStream( ConfigHelpers(io.pravega.segmentstore.server.ConfigHelpers) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) UpdateableContainerMetadata(io.pravega.segmentstore.server.UpdateableContainerMetadata) SegmentMetadata(io.pravega.segmentstore.server.SegmentMetadata) ReadResultEntry(io.pravega.segmentstore.contracts.ReadResultEntry) BiConsumer(java.util.function.BiConsumer) Timeout(org.junit.rules.Timeout) StreamHelpers( StreamSegmentTruncatedException(io.pravega.segmentstore.contracts.StreamSegmentTruncatedException) lombok.val(lombok.val) IOException( Test(org.junit.Test) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) AtomicLong(java.util.concurrent.atomic.AtomicLong) Rule(org.junit.Rule) Assert(org.junit.Assert) Collections(java.util.Collections) Cache( InputStream( ArrayList(java.util.ArrayList) ReadResult(io.pravega.segmentstore.contracts.ReadResult) Cleanup(lombok.Cleanup) CacheKey(io.pravega.segmentstore.server.CacheKey) lombok.val(lombok.val) UpdateableSegmentMetadata(io.pravega.segmentstore.server.UpdateableSegmentMetadata) ReadResultEntryContents(io.pravega.segmentstore.contracts.ReadResultEntryContents) StreamSegmentNotExistsException(io.pravega.segmentstore.contracts.StreamSegmentNotExistsException) UpdateableSegmentMetadata(io.pravega.segmentstore.server.UpdateableSegmentMetadata) SegmentMetadata(io.pravega.segmentstore.server.SegmentMetadata) ByteArrayInputStream( CompletionException(java.util.concurrent.CompletionException) ReadResultEntry(io.pravega.segmentstore.contracts.ReadResultEntry) AtomicLong(java.util.concurrent.atomic.AtomicLong) Test(org.junit.Test)

Example 5 with CacheKey

the class ContainerReadIndexTests method checkOffsets.

the class ContainerReadIndexTests method checkOffsets.

// endregion
// region Helpers
private void checkOffsets(List<CacheKey> removedKeys, long segmentId, int startIndex, int count, int startOffset, int stepIncrease) {
    int expectedStartOffset = startOffset;
    for (int i = 0; i < count; i++) {
        int listIndex = startIndex + i;
        CacheKey currentKey = removedKeys.get(startIndex + i);
        Assert.assertEquals(String.format("Unexpected CacheKey.SegmentOffset at index %d for SegmentId %d.", listIndex, segmentId), expectedStartOffset, currentKey.getOffset());
        expectedStartOffset += stepIncrease;
Also used : CacheKey(io.pravega.segmentstore.server.CacheKey)


