use of io.pravega.segmentstore.contracts.ReadResultEntry in project pravega by pravega.
the class StreamSegmentReadIndex method triggerFutureReads.
// endregion
// region Reading
/**
* Triggers all future reads that have a starting offset before the given value.
*
* @throws IllegalStateException If the read index is in recovery mode.
*/
void triggerFutureReads() {
Exceptions.checkNotClosed(this.closed, this);
Preconditions.checkState(!this.recoveryMode, "StreamSegmentReadIndex is in Recovery Mode.");
// Get all eligible Future Reads which wait for data prior to the end offset.
// Since we are not actually using this entry's data, there is no need to 'touch' it.
ReadIndexEntry lastEntry;
synchronized (this.lock) {
lastEntry = this.indexEntries.getLast();
}
if (lastEntry == null) {
// Nothing to do.
return;
}
Collection<FutureReadResultEntry> futureReads;
boolean sealed = this.metadata.isSealed();
if (sealed) {
// Get everything, even if some Future Reads are in the future - those will eventually return EndOfSegment.
futureReads = this.futureReads.pollAll();
} else {
// Get only those up to the last offset of the last append.
futureReads = this.futureReads.poll(lastEntry.getLastStreamSegmentOffset());
}
log.debug("{}: triggerFutureReads (Count = {}, Offset = {}, Sealed = {}).", this.traceObjectId, futureReads.size(), lastEntry.getLastStreamSegmentOffset(), sealed);
for (FutureReadResultEntry r : futureReads) {
ReadResultEntry entry = getSingleReadResultEntry(r.getStreamSegmentOffset(), r.getRequestedReadLength());
assert entry != null : "Serving a StorageReadResultEntry with a null result";
assert !(entry instanceof FutureReadResultEntry) : "Serving a FutureReadResultEntry with another FutureReadResultEntry.";
log.trace("{}: triggerFutureReads (Offset = {}, Type = {}).", this.traceObjectId, r.getStreamSegmentOffset(), entry.getType());
if (entry.getType() == ReadResultEntryType.EndOfStreamSegment) {
// We have attempted to read beyond the end of the stream. Fail the read request with the appropriate message.
r.fail(new StreamSegmentSealedException(String.format("StreamSegment has been sealed at offset %d. There can be no more reads beyond this offset.", this.metadata.getLength())));
} else {
if (!entry.getContent().isDone()) {
// Normally, all Future Reads are served from Cache, since they reflect data that has just been appended.
// However, it's possible that after recovery, we get a read for some data that we do not have in the
// cache (but it's not a tail read) - this data exists in Storage but our StorageLength has not yet been
// updated. As such, the only solution we have is to return a FutureRead which will be satisfied when
// the Writer updates the StorageLength (and trigger future reads). In that scenario, entry we get
// will likely not be auto-fetched, so we need to request the content.
entry.requestContent(this.config.getStorageReadDefaultTimeout());
}
CompletableFuture<ReadResultEntryContents> entryContent = entry.getContent();
entryContent.thenAccept(r::complete);
Futures.exceptionListener(entryContent, r::fail);
}
}
}
use of io.pravega.segmentstore.contracts.ReadResultEntry in project pravega by pravega.
the class PravegaRequestProcessor method collectCachedEntries.
/**
* Reads all of the cachedEntries from the ReadResult and puts their content into the cachedEntries list.
* Upon encountering a non-cached entry, it stops iterating and returns it.
*/
private ReadResultEntry collectCachedEntries(long initialOffset, ReadResult readResult, ArrayList<ReadResultEntryContents> cachedEntries) {
long expectedOffset = initialOffset;
while (readResult.hasNext()) {
ReadResultEntry entry = readResult.next();
if (entry.getType() == Cache) {
Preconditions.checkState(entry.getStreamSegmentOffset() == expectedOffset, "Data returned from read was not contiguous.");
ReadResultEntryContents content = entry.getContent().getNow(null);
expectedOffset += content.getLength();
cachedEntries.add(content);
} else {
return entry;
}
}
return null;
}
use of io.pravega.segmentstore.contracts.ReadResultEntry in project pravega by pravega.
the class ContainerReadIndexTests method testCacheEviction.
/**
* Tests the ability to evict entries from the ReadIndex under various conditions:
* * If an entry is aged out
* * If an entry is pushed out because of cache space pressure.
* <p>
* This also verifies that certain entries, such as RedirectReadIndexEntries and entries after the Storage Offset are
* not removed.
* <p>
* The way this test goes is as follows (it's pretty subtle, because there aren't many ways to hook into the ReadIndex and see what it's doing)
* 1. It creates a bunch of segments, and populates them in storage (each) up to offset N/2-1 (this is called pre-storage)
* 2. It populates the ReadIndex for each of those segments from offset N/2 to offset N-1 (this is called post-storage)
* 3. It loads all the data from Storage into the ReadIndex, in entries of size equal to those already loaded in step #2.
* 3a. At this point, all the entries added in step #2 have Generations 0..A/4-1, and step #3 have generations A/4..A-1
* 4. Append more data at the end. This forces the generation to increase to 1.25A.
* 4a. Nothing should be evicted from the cache now, since the earliest items are all post-storage.
* 5. We 'touch' (read) the first 1/3 of pre-storage entries (offsets 0..N/4).
* 5a. At this point, those entries (offsets 0..N/6) will have the newest generations (1.25A..1.5A)
* 6. We append more data (equivalent to the data we touched)
* 6a. Nothing should be evicted, since those generations that were just eligible for removal were touched and bumped up.
* 7. We forcefully increase the current generation by 1 (without touching the ReadIndex)
* 7a. At this point, we expect all the pre-storage items, except the touched ones, to be evicted. This is generations 0.25A-0.75A.
* 8. Update the metadata and indicate that all the post-storage entries are now pre-storage and bump the generation by 0.75A.
* 8a. At this point, we expect all former post-storage items and pre-storage items to be evicted (in this order).
* <p>
* The final order of eviction (in terms of offsets, for each segment), is:
* * 0.25N-0.75N, 0.75N..N, N..1.25N, 0..0.25N, 1.25N..1.5N (remember that we added quite a bunch of items after the initial run).
*/
@Test
@SuppressWarnings("checkstyle:CyclomaticComplexity")
public void testCacheEviction() throws Exception {
// Create a CachePolicy with a set number of generations and a known max size.
// Each generation contains exactly one entry, so the number of generations is also the number of entries.
final int appendSize = 100;
// This also doubles as number of generations (each generation, we add one append for each segment).
final int entriesPerSegment = 100;
final int cacheMaxSize = SEGMENT_COUNT * entriesPerSegment * appendSize;
// 25% of the entries are beyond the StorageOffset
final int postStorageEntryCount = entriesPerSegment / 4;
// 75% of the entries are before the StorageOffset.
final int preStorageEntryCount = entriesPerSegment - postStorageEntryCount;
CachePolicy cachePolicy = new CachePolicy(cacheMaxSize, Duration.ofMillis(1000 * 2 * entriesPerSegment), Duration.ofMillis(1000));
// To properly test this, we want predictable storage reads.
ReadIndexConfig config = ConfigHelpers.withInfiniteCachePolicy(ReadIndexConfig.builder().with(ReadIndexConfig.STORAGE_READ_ALIGNMENT, appendSize)).build();
ArrayList<CacheKey> removedKeys = new ArrayList<>();
@Cleanup TestContext context = new TestContext(config, cachePolicy);
// Record every cache removal.
context.cacheFactory.cache.removeCallback = removedKeys::add;
// Create the segments (metadata + storage).
ArrayList<Long> segmentIds = createSegments(context);
createSegmentsInStorage(context);
// Populate the Storage with appropriate data.
byte[] preStorageData = new byte[preStorageEntryCount * appendSize];
for (long segmentId : segmentIds) {
UpdateableSegmentMetadata sm = context.metadata.getStreamSegmentMetadata(segmentId);
val handle = context.storage.openWrite(sm.getName()).join();
context.storage.write(handle, 0, new ByteArrayInputStream(preStorageData), preStorageData.length, TIMEOUT).join();
sm.setStorageLength(preStorageData.length);
sm.setLength(preStorageData.length);
}
// Callback that appends one entry at the end of the given segment id.
Consumer<Long> appendOneEntry = segmentId -> {
UpdateableSegmentMetadata sm = context.metadata.getStreamSegmentMetadata(segmentId);
byte[] data = new byte[appendSize];
long offset = sm.getLength();
sm.setLength(offset + data.length);
try {
context.readIndex.append(segmentId, offset, data);
} catch (StreamSegmentNotExistsException ex) {
throw new CompletionException(ex);
}
};
// Populate the ReadIndex with the Append entries (post-StorageOffset)
for (int i = 0; i < postStorageEntryCount; i++) {
segmentIds.forEach(appendOneEntry);
// Each time we make a round of appends (one per segment), we increment the generation in the CacheManager.
context.cacheManager.applyCachePolicy();
}
// Read all the data from Storage, making sure we carefully associate them with the proper generation.
for (int i = 0; i < preStorageEntryCount; i++) {
long offset = i * appendSize;
for (long segmentId : segmentIds) {
@Cleanup ReadResult result = context.readIndex.read(segmentId, offset, appendSize, TIMEOUT);
ReadResultEntry resultEntry = result.next();
Assert.assertEquals("Unexpected type of ReadResultEntry when trying to load up data into the ReadIndex Cache.", ReadResultEntryType.Storage, resultEntry.getType());
resultEntry.requestContent(TIMEOUT);
ReadResultEntryContents contents = resultEntry.getContent().get(TIMEOUT.toMillis(), TimeUnit.MILLISECONDS);
Assert.assertFalse("Not expecting more data to be available for reading.", result.hasNext());
Assert.assertEquals("Unexpected ReadResultEntry length when trying to load up data into the ReadIndex Cache.", appendSize, contents.getLength());
}
context.cacheManager.applyCachePolicy();
}
Assert.assertEquals("Not expecting any removed Cache entries at this point (cache is not full).", 0, removedKeys.size());
// Append more data (equivalent to all post-storage entries), and verify that NO entries are being evicted (we cannot evict post-storage entries).
for (int i = 0; i < postStorageEntryCount; i++) {
segmentIds.forEach(appendOneEntry);
context.cacheManager.applyCachePolicy();
}
Assert.assertEquals("Not expecting any removed Cache entries at this point (only eligible entries were post-storage).", 0, removedKeys.size());
// 'Touch' the first few entries read from storage. This should move them to the back of the queue (they won't be the first ones to be evicted).
int touchCount = preStorageEntryCount / 3;
for (int i = 0; i < touchCount; i++) {
long offset = i * appendSize;
for (long segmentId : segmentIds) {
@Cleanup ReadResult result = context.readIndex.read(segmentId, offset, appendSize, TIMEOUT);
ReadResultEntry resultEntry = result.next();
Assert.assertEquals("Unexpected type of ReadResultEntry when trying to load up data into the ReadIndex Cache.", ReadResultEntryType.Cache, resultEntry.getType());
}
}
// Append more data (equivalent to the amount of data we 'touched'), and verify that the entries we just touched are not being removed..
for (int i = 0; i < touchCount; i++) {
segmentIds.forEach(appendOneEntry);
context.cacheManager.applyCachePolicy();
}
Assert.assertEquals("Not expecting any removed Cache entries at this point (we touched old entries and they now have the newest generation).", 0, removedKeys.size());
// Increment the generations so that we are caught up to just before the generation where the "touched" items now live.
context.cacheManager.applyCachePolicy();
// We expect all but the 'touchCount' pre-Storage entries to be removed.
int expectedRemovalCount = (preStorageEntryCount - touchCount) * SEGMENT_COUNT;
Assert.assertEquals("Unexpected number of removed entries after having forced out all pre-storage entries.", expectedRemovalCount, removedKeys.size());
// Now update the metadata and indicate that all the post-storage data has been moved to storage.
segmentIds.forEach(segmentId -> {
UpdateableSegmentMetadata sm = context.metadata.getStreamSegmentMetadata(segmentId);
sm.setStorageLength(sm.getLength());
});
// We add one artificial entry, which we'll be touching forever and ever; this forces the CacheManager to
// update its current generation every time. We will be ignoring this entry for our test.
SegmentMetadata readSegment = context.metadata.getStreamSegmentMetadata(segmentIds.get(0));
appendOneEntry.accept(readSegment.getId());
// Now evict everything (whether by size of by aging out).
for (int i = 0; i < cachePolicy.getMaxGenerations(); i++) {
@Cleanup ReadResult result = context.readIndex.read(readSegment.getId(), readSegment.getLength() - appendSize, appendSize, TIMEOUT);
result.next();
context.cacheManager.applyCachePolicy();
}
int expectedRemovalCountPerSegment = entriesPerSegment + touchCount + postStorageEntryCount;
int expectedTotalRemovalCount = SEGMENT_COUNT * expectedRemovalCountPerSegment;
Assert.assertEquals("Unexpected number of removed entries after having forced out all the entries.", expectedTotalRemovalCount, removedKeys.size());
// Finally, verify that the evicted items are in the correct order (for each segment). See this test's description for details.
for (long segmentId : segmentIds) {
List<CacheKey> segmentRemovedKeys = removedKeys.stream().filter(key -> key.getStreamSegmentId() == segmentId).collect(Collectors.toList());
Assert.assertEquals("Unexpected number of removed entries for segment " + segmentId, expectedRemovalCountPerSegment, segmentRemovedKeys.size());
// The correct order of eviction (N=entriesPerSegment) is: 0.25N-0.75N, 0.75N..N, N..1.25N, 0..0.25N, 1.25N..1.5N.
// This is equivalent to the following tests
// 0.25N-1.25N
checkOffsets(segmentRemovedKeys, segmentId, 0, entriesPerSegment, entriesPerSegment * appendSize / 4, appendSize);
// 0..0.25N
checkOffsets(segmentRemovedKeys, segmentId, entriesPerSegment, entriesPerSegment / 4, 0, appendSize);
// 1.25N..1.5N
checkOffsets(segmentRemovedKeys, segmentId, entriesPerSegment + entriesPerSegment / 4, entriesPerSegment / 4, (int) (entriesPerSegment * appendSize * 1.25), appendSize);
}
}
use of io.pravega.segmentstore.contracts.ReadResultEntry in project pravega by pravega.
the class ContainerReadIndexTests method testBatchedRead.
/**
* Tests the ability for the ReadIndex to batch multiple index entries together into a bigger read. This test
* writes a lot of very small appends to the index, then issues a full read (from the beginning) while configuring
* the read index to return results of no less than a particular size. As an added bonus, it also forces a Storage
* Read towards the end to make sure the ReadIndex doesn't coalesce those into the result as well.
*/
@Test
public void testBatchedRead() throws Exception {
final int totalAppendLength = 500 * 1000;
final int maxAppendLength = 100;
final int minReadLength = 16 * 1024;
final byte[] segmentData = new byte[totalAppendLength];
final Random rnd = new Random(0);
rnd.nextBytes(segmentData);
final ReadIndexConfig config = ConfigHelpers.withInfiniteCachePolicy(ReadIndexConfig.builder().with(ReadIndexConfig.MEMORY_READ_MIN_LENGTH, minReadLength)).build();
@Cleanup TestContext context = new TestContext(config, config.getCachePolicy());
// Create the segment in Storage and populate it with all the data (one segment is sufficient for this test).
final long segmentId = createSegment(0, context);
createSegmentsInStorage(context);
final UpdateableSegmentMetadata segmentMetadata = context.metadata.getStreamSegmentMetadata(segmentId);
val writeHandle = context.storage.openWrite(segmentMetadata.getName()).join();
context.storage.write(writeHandle, 0, new ByteArrayInputStream(segmentData), segmentData.length, TIMEOUT).join();
segmentMetadata.setStorageLength(segmentData.length);
// Add the contents of the segment to the read index using very small appends (same data as in Storage).
int writtenLength = 0;
int remainingLength = totalAppendLength;
int lastCacheOffset = -1;
while (remainingLength > 0) {
int appendLength = rnd.nextInt(maxAppendLength) + 1;
if (appendLength < remainingLength) {
// Make another append.
byte[] appendData = new byte[appendLength];
System.arraycopy(segmentData, writtenLength, appendData, 0, appendLength);
appendSingleWrite(segmentId, appendData, context);
writtenLength += appendLength;
remainingLength -= appendLength;
} else {
// This would be the last append. Don't add it, so force the read index to load it from Storage.
lastCacheOffset = writtenLength;
appendLength = remainingLength;
writtenLength += appendLength;
remainingLength = 0;
segmentMetadata.setLength(writtenLength);
}
}
// Check all the appended data.
@Cleanup ReadResult readResult = context.readIndex.read(segmentId, 0, totalAppendLength, TIMEOUT);
long expectedCurrentOffset = 0;
boolean encounteredStorageRead = false;
while (readResult.hasNext()) {
ReadResultEntry entry = readResult.next();
if (entry.getStreamSegmentOffset() < lastCacheOffset) {
Assert.assertEquals("Expecting only a Cache entry before switch offset.", ReadResultEntryType.Cache, entry.getType());
} else {
Assert.assertEquals("Expecting only a Storage entry on or after switch offset.", ReadResultEntryType.Storage, entry.getType());
entry.requestContent(TIMEOUT);
entry.getContent().get(TIMEOUT.toMillis(), TimeUnit.MILLISECONDS);
encounteredStorageRead = true;
}
// Check the entry contents.
byte[] entryData = new byte[entry.getContent().join().getLength()];
StreamHelpers.readAll(entry.getContent().join().getData(), entryData, 0, entryData.length);
AssertExtensions.assertArrayEquals("Unexpected data read at offset " + expectedCurrentOffset, segmentData, (int) expectedCurrentOffset, entryData, 0, entryData.length);
expectedCurrentOffset += entryData.length;
// cut short by the storage entry.
if (expectedCurrentOffset < lastCacheOffset) {
AssertExtensions.assertGreaterThanOrEqual("Expecting a ReadResultEntry of a minimum length for cache hit.", minReadLength, entryData.length);
}
}
Assert.assertEquals("Not encountered any storage reads, even though one was forced.", lastCacheOffset > 0, encounteredStorageRead);
}
use of io.pravega.segmentstore.contracts.ReadResultEntry in project pravega by pravega.
the class ContainerReadIndexTests method testStorageReadTransactionNoCache.
// region Scenario-based tests
/**
* Tests the following Scenario, where the ReadIndex would either read from a bad offset or fail with an invalid offset
* when reading in certain conditions:
* * A segment has a transaction, which has N bytes written to it.
* * The transaction is merged into its parent segment at offset M > N.
* * At least one byte of the transaction is evicted from the cache
* * A read is issued to the parent segment for that byte that was evicted
* * The ReadIndex is supposed to issue a Storage Read with an offset inside the transaction range (so translate
* from the parent's offset to the transaction's offset). However, after the read, it is supposed to look like the
* data was read from the parent segment, so it should not expose the adjusted offset at all.
* <p>
* This very specific unit test is a result of a regression found during testing.
*/
@Test
public void testStorageReadTransactionNoCache() throws Exception {
CachePolicy cachePolicy = new CachePolicy(1, Duration.ZERO, Duration.ofMillis(1));
@Cleanup TestContext context = new TestContext(DEFAULT_CONFIG, cachePolicy);
// Create parent segment and one transaction
long parentId = createSegment(0, context);
UpdateableSegmentMetadata parentMetadata = context.metadata.getStreamSegmentMetadata(parentId);
long transactionId = createTransaction(parentMetadata, 1, context);
UpdateableSegmentMetadata transactionMetadata = context.metadata.getStreamSegmentMetadata(transactionId);
createSegmentsInStorage(context);
ByteArrayOutputStream writtenStream = new ByteArrayOutputStream();
// Write something to the transaction, and make sure it also makes its way to Storage.
byte[] transactionWriteData = getAppendData(transactionMetadata.getName(), transactionId, 0, 0);
appendSingleWrite(transactionId, transactionWriteData, context);
val handle = context.storage.openWrite(transactionMetadata.getName()).join();
context.storage.write(handle, 0, new ByteArrayInputStream(transactionWriteData), transactionWriteData.length, TIMEOUT).join();
transactionMetadata.setStorageLength(transactionMetadata.getLength());
// Write some data to the parent, and make sure it is more than what we write to the transaction (hence the 10).
for (int i = 0; i < 10; i++) {
byte[] parentWriteData = getAppendData(parentMetadata.getName(), parentId, i, i);
appendSingleWrite(parentId, parentWriteData, context);
writtenStream.write(parentWriteData);
}
// Seal & Begin-merge the transaction (do not seal in storage).
transactionMetadata.markSealed();
long mergeOffset = parentMetadata.getLength();
parentMetadata.setLength(mergeOffset + transactionMetadata.getLength());
context.readIndex.beginMerge(parentId, mergeOffset, transactionId);
transactionMetadata.markMerged();
writtenStream.write(transactionWriteData);
// Clear the cache.
context.cacheManager.applyCachePolicy();
// Issue read from the parent.
ReadResult rr = context.readIndex.read(parentId, mergeOffset, transactionWriteData.length, TIMEOUT);
Assert.assertTrue("Parent Segment read indicates no data available.", rr.hasNext());
ByteArrayOutputStream readStream = new ByteArrayOutputStream();
long expectedOffset = mergeOffset;
while (rr.hasNext()) {
ReadResultEntry entry = rr.next();
Assert.assertEquals("Unexpected offset for read result entry.", expectedOffset, entry.getStreamSegmentOffset());
Assert.assertEquals("Served read result entry is not from storage.", ReadResultEntryType.Storage, entry.getType());
// Request contents and store for later use.
entry.requestContent(TIMEOUT);
ReadResultEntryContents contents = entry.getContent().get(TIMEOUT.toMillis(), TimeUnit.MILLISECONDS);
byte[] readBuffer = new byte[contents.getLength()];
StreamHelpers.readAll(contents.getData(), readBuffer, 0, readBuffer.length);
readStream.write(readBuffer);
expectedOffset += contents.getLength();
}
byte[] readData = readStream.toByteArray();
Assert.assertArrayEquals("Unexpected data read back.", transactionWriteData, readData);
}
Aggregations