use of io.pravega.segmentstore.storage.metadata.MetadataTransaction in project pravega by pravega.
the class ChunkedSegmentStorage method claimOwnership.
/**
* Checks ownership and adjusts the length of the segment if required.
*
* @param txn Active {@link MetadataTransaction}.
* @param segmentMetadata {@link SegmentMetadata} for the segment to change ownership for.
* throws ChunkStorageException In case of any chunk storage related errors.
* throws StorageMetadataException In case of any chunk metadata store related errors.
*/
private CompletableFuture<Void> claimOwnership(MetadataTransaction txn, SegmentMetadata segmentMetadata) {
// Get the last chunk
val lastChunkName = segmentMetadata.getLastChunk();
final CompletableFuture<Boolean> f;
if (shouldAppend() && null != lastChunkName) {
f = txn.get(lastChunkName).thenComposeAsync(storageMetadata -> {
val lastChunk = (ChunkMetadata) storageMetadata;
Preconditions.checkState(null != lastChunk, "last chunk metadata must not be null.");
Preconditions.checkState(null != lastChunk.getName(), "Name of last chunk must not be null.");
log.debug("{} claimOwnership - current last chunk - segment={}, last chunk={}, Length={}.", logPrefix, segmentMetadata.getName(), lastChunk.getName(), lastChunk.getLength());
return chunkStorage.getInfo(lastChunkName).thenApplyAsync(chunkInfo -> {
Preconditions.checkState(chunkInfo != null, "chunkInfo for last chunk must not be null.");
Preconditions.checkState(lastChunk != null, "last chunk metadata must not be null.");
// Adjust its length;
if (chunkInfo.getLength() != lastChunk.getLength()) {
Preconditions.checkState(chunkInfo.getLength() > lastChunk.getLength(), "Length of last chunk on LTS must be greater than what is in metadata. Chunk=%s length=%s", lastChunk, chunkInfo.getLength());
// Whatever length you see right now is the final "sealed" length of the last chunk.
val oldLength = segmentMetadata.getLength();
lastChunk.setLength(chunkInfo.getLength());
segmentMetadata.setLength(segmentMetadata.getLastChunkStartOffset() + lastChunk.getLength());
if (!segmentMetadata.isStorageSystemSegment()) {
addBlockIndexEntriesForChunk(txn, segmentMetadata.getName(), lastChunk.getName(), segmentMetadata.getLastChunkStartOffset(), oldLength, segmentMetadata.getLength());
}
txn.update(lastChunk);
log.debug("{} claimOwnership - Length of last chunk adjusted - segment={}, last chunk={}, Length={}.", logPrefix, segmentMetadata.getName(), lastChunk.getName(), chunkInfo.getLength());
}
return true;
}, executor).exceptionally(e -> {
val ex = Exceptions.unwrap(e);
if (ex instanceof ChunkNotFoundException) {
// This probably means that this instance is fenced out and newer instance truncated this segment.
// Try a commit of unmodified data to fail fast.
log.debug("{} claimOwnership - Last chunk was missing, failing fast - segment={}, last chunk={}.", logPrefix, segmentMetadata.getName(), lastChunk.getName());
txn.update(segmentMetadata);
return false;
}
throw new CompletionException(ex);
});
}, executor);
} else {
f = CompletableFuture.completedFuture(true);
}
return f.thenComposeAsync(shouldChange -> {
// If this instance is no more owner, then transaction commit will fail.So it is still safe.
if (shouldChange) {
segmentMetadata.setOwnerEpoch(this.epoch);
segmentMetadata.setOwnershipChanged(true);
}
// Update and commit
// If This instance is fenced this update will fail.
txn.update(segmentMetadata);
return txn.commit();
}, executor);
}
use of io.pravega.segmentstore.storage.metadata.MetadataTransaction in project pravega by pravega.
the class SystemJournal method applySystemLogOperations.
/**
* Process all systemLog entries to recreate the state of metadata storage system segments.
*/
private CompletableFuture<Void> applySystemLogOperations(MetadataTransaction txn, BootstrapState state, SystemSnapshotRecord systemSnapshotRecord) {
val epochToStartScanning = new AtomicLong();
val fileIndexToRecover = new AtomicInteger(1);
val journalsProcessed = Collections.synchronizedList(new ArrayList<String>());
// Starting with journal file after last snapshot,
if (null != systemSnapshotRecord) {
epochToStartScanning.set(systemSnapshotRecord.epoch);
fileIndexToRecover.set(systemSnapshotRecord.fileIndex + 1);
}
log.debug("SystemJournal[{}] Applying journal operations. Starting at epoch={} journal index={}", containerId, epochToStartScanning.get(), fileIndexToRecover.get());
// Linearly read and apply all the journal files after snapshot.
val epochToRecover = new AtomicLong(epochToStartScanning.get());
return Futures.loop(() -> epochToRecover.get() < epoch, () -> {
// Start scan with file index 1 if epoch is later than snapshot.
if (epochToRecover.get() > epochToStartScanning.get()) {
fileIndexToRecover.set(1);
}
// Process one journal at a time.
val scanAhead = new AtomicInteger();
val isScanDone = new AtomicBoolean();
return Futures.loop(() -> !isScanDone.get(), () -> {
val systemLogName = getSystemJournalChunkName(containerId, epochToRecover.get(), fileIndexToRecover.get());
return getContents(systemLogName, true).thenApplyAsync(contents -> {
// We successfully read the contents.
journalsProcessed.add(systemLogName);
// Reset scan ahead counter.
scanAhead.set(0);
return contents;
}, executor).thenComposeAsync(contents -> processJournalContents(txn, state, systemLogName, new ByteArrayInputStream(contents)), executor).handleAsync((v, e) -> {
if (null != e) {
val ex = Exceptions.unwrap(e);
if (ex instanceof ChunkNotFoundException) {
// Journal chunk does not exist.
log.debug("SystemJournal[{}] Journal does not exist for epoch={}. Last journal index={}", containerId, epochToRecover.get(), fileIndexToRecover.get());
// Check whether we have reached end of our scanning (including scan ahead).
if (scanAhead.incrementAndGet() > config.getMaxJournalWriteAttempts()) {
isScanDone.set(true);
log.debug("SystemJournal[{}] Done applying journal operations for epoch={}. Last journal index={}", containerId, epochToRecover.get(), fileIndexToRecover.get());
return null;
}
} else {
throw new CompletionException(e);
}
}
// Move to next journal.
fileIndexToRecover.incrementAndGet();
state.filesProcessedCount.incrementAndGet();
return v;
}, executor);
}, executor);
}, v -> epochToRecover.incrementAndGet(), executor).thenRunAsync(() -> pendingGarbageChunks.addAll(journalsProcessed), executor);
}
use of io.pravega.segmentstore.storage.metadata.MetadataTransaction in project pravega by pravega.
the class SystemJournal method applyChunkAddition.
/**
* Apply chunk addition.
*/
private CompletableFuture<Void> applyChunkAddition(MetadataTransaction txn, Map<String, Long> chunkStartOffsets, String segmentName, String oldChunkName, String newChunkName, long offset) {
Preconditions.checkState(null != oldChunkName, "oldChunkName must not be null");
Preconditions.checkState(null != newChunkName && !newChunkName.isEmpty(), "newChunkName must not be null or empty");
return txn.get(segmentName).thenComposeAsync(m -> {
val segmentMetadata = (SegmentMetadata) m;
segmentMetadata.checkInvariants();
validateSegment(txn, segmentName);
// set length.
segmentMetadata.setLength(offset);
val newChunkMetadata = ChunkMetadata.builder().name(newChunkName).build();
newChunkMetadata.setActive(true);
txn.create(newChunkMetadata);
txn.markPinned(newChunkMetadata);
chunkStartOffsets.put(newChunkName, offset);
CompletableFuture<Void> f;
// Set first and last pointers.
if (!oldChunkName.isEmpty()) {
Preconditions.checkState(txn.getData().containsKey(oldChunkName), "Txn must contain old key", oldChunkName);
f = txn.get(oldChunkName).thenComposeAsync(mm -> {
val oldChunk = (ChunkMetadata) mm;
Preconditions.checkState(null != oldChunk, "oldChunk must not be null. oldChunkName=%s", oldChunkName);
// In case the old segment store was still writing some zombie chunks when ownership changed
// then new offset may invalidate tail part of chunk list.
// Note that chunk with oldChunkName is still valid, it is the chunks after this that become invalid.
val toDelete = new AtomicReference<>(oldChunk.getNextChunk());
return Futures.loop(() -> toDelete.get() != null, () -> txn.get(toDelete.get()).thenAcceptAsync(mmm -> {
val chunkToDelete = (ChunkMetadata) mmm;
txn.delete(toDelete.get());
segmentMetadata.setChunkCount(segmentMetadata.getChunkCount() - 1);
// move to next chunk in list of now zombie chunks
toDelete.set(chunkToDelete.getNextChunk());
}, executor), executor).thenAcceptAsync(v -> {
// Set next chunk
oldChunk.setNextChunk(newChunkName);
// Set length
val oldLength = chunkStartOffsets.get(oldChunkName);
oldChunk.setLength(offset - oldLength);
txn.update(oldChunk);
}, executor);
}, executor);
} else {
segmentMetadata.setFirstChunk(newChunkName);
segmentMetadata.setStartOffset(offset);
Preconditions.checkState(segmentMetadata.getChunkCount() == 0, "Chunk count must be 0. %s", segmentMetadata);
f = CompletableFuture.completedFuture(null);
}
return f.thenComposeAsync(v -> {
segmentMetadata.setLastChunk(newChunkName);
segmentMetadata.setLastChunkStartOffset(offset);
segmentMetadata.setChunkCount(segmentMetadata.getChunkCount() + 1);
segmentMetadata.checkInvariants();
// Save the segment metadata.
txn.update(segmentMetadata);
if (config.isSelfCheckEnabled()) {
return validateSegment(txn, segmentName);
} else {
return CompletableFuture.completedFuture(null);
}
}, executor);
}, executor);
}
use of io.pravega.segmentstore.storage.metadata.MetadataTransaction in project pravega by pravega.
the class SystemJournal method adjustLastChunkLengths.
/**
* Adjusts the lengths of last chunks for each segment.
*/
private CompletableFuture<Void> adjustLastChunkLengths(MetadataTransaction txn) {
val futures = new ArrayList<CompletableFuture<Void>>();
for (val systemSegment : systemSegments) {
val f = txn.get(systemSegment).thenComposeAsync(m -> {
val segmentMetadata = (SegmentMetadata) m;
segmentMetadata.checkInvariants();
CompletableFuture<Void> ff;
// Update length of last chunk in metadata to what we actually find on LTS.
if (null != segmentMetadata.getLastChunk()) {
ff = chunkStorage.getInfo(segmentMetadata.getLastChunk()).thenComposeAsync(chunkInfo -> {
long length = chunkInfo.getLength();
return txn.get(segmentMetadata.getLastChunk()).thenAcceptAsync(mm -> {
val lastChunk = (ChunkMetadata) mm;
Preconditions.checkState(null != lastChunk, "lastChunk must not be null. Segment=%s", segmentMetadata);
lastChunk.setLength(length);
txn.update(lastChunk);
val newLength = segmentMetadata.getLastChunkStartOffset() + length;
segmentMetadata.setLength(newLength);
log.debug("SystemJournal[{}] Adjusting length of last chunk segment. segment={}, length={} chunk={}, chunk length={}", containerId, segmentMetadata.getName(), length, lastChunk.getName(), newLength);
}, executor);
}, executor);
} else {
ff = CompletableFuture.completedFuture(null);
}
return ff.thenApplyAsync(v -> {
Preconditions.checkState(segmentMetadata.isOwnershipChanged(), "ownershipChanged must be true. Segment=%s", segmentMetadata);
segmentMetadata.checkInvariants();
return segmentMetadata;
}, executor);
}, executor).thenAcceptAsync(segmentMetadata -> txn.update(segmentMetadata), executor);
futures.add(f);
}
return Futures.allOf(futures);
}
use of io.pravega.segmentstore.storage.metadata.MetadataTransaction in project pravega by pravega.
the class ReadOperation method findChunkForOffset.
private CompletableFuture<Void> findChunkForOffset(MetadataTransaction txn) {
currentChunkName = segmentMetadata.getFirstChunk();
chunkToReadFrom = null;
Preconditions.checkState(null != currentChunkName, "currentChunkName must not be null. Segment=%s", segmentMetadata.getName());
bytesRemaining.set(length);
currentBufferOffset.set(bufferOffset);
currentOffset.set(offset);
totalBytesRead.set(0);
// Find the first chunk that contains the data.
startOffsetForCurrentChunk.set(segmentMetadata.getFirstChunkStartOffset());
boolean shouldOnlyReadLastChunk = offset >= segmentMetadata.getLastChunkStartOffset();
if (shouldOnlyReadLastChunk) {
startOffsetForCurrentChunk.set(segmentMetadata.getLastChunkStartOffset());
currentChunkName = segmentMetadata.getLastChunk();
} else {
// Find the name of the chunk in the cached read index that is floor to required offset.
val floorEntry = chunkedSegmentStorage.getReadIndexCache().findFloor(handle.getSegmentName(), offset);
if (null != floorEntry && startOffsetForCurrentChunk.get() < floorEntry.getOffset() && null != floorEntry.getChunkName()) {
startOffsetForCurrentChunk.set(floorEntry.getOffset());
currentChunkName = floorEntry.getChunkName();
}
}
final long floorBlockStartOffset = getFloorBlockStartOffset(offset);
CompletableFuture<Void> f;
if (!shouldOnlyReadLastChunk && !segmentMetadata.isStorageSystemSegment() && startOffsetForCurrentChunk.get() < floorBlockStartOffset) {
val indexLookupTimer = new Timer();
f = txn.get(NameUtils.getSegmentReadIndexBlockName(segmentMetadata.getName(), floorBlockStartOffset)).thenAcceptAsync(storageMetadata -> {
if (null != storageMetadata) {
ReadIndexBlockMetadata blockMetadata = (ReadIndexBlockMetadata) storageMetadata;
if (blockMetadata.getStartOffset() <= offset) {
startOffsetForCurrentChunk.set(blockMetadata.getStartOffset());
currentChunkName = blockMetadata.getChunkName();
log.debug("{} read - found block index to start scanning - op={}, segment={}, chunk={}, startOffset={}, offset={}.", chunkedSegmentStorage.getLogPrefix(), System.identityHashCode(this), handle.getSegmentName(), currentChunkName, startOffsetForCurrentChunk.get(), offset);
// Note: This just is prefetch call. Do not wait.
val nextBlock = getFloorBlockStartOffset(offset + length);
if (nextBlock > floorBlockStartOffset + chunkedSegmentStorage.getConfig().getIndexBlockSize()) {
// We read multiple blocks already
txn.get(NameUtils.getSegmentReadIndexBlockName(segmentMetadata.getName(), nextBlock));
} else {
// Prefetch next block index entry.
txn.get(NameUtils.getSegmentReadIndexBlockName(segmentMetadata.getName(), floorBlockStartOffset + chunkedSegmentStorage.getConfig().getIndexBlockSize()));
}
} else {
log.warn("{} read - block entry offset must be floor to requested offset. op={} segment={} offset={} length={} block={}", chunkedSegmentStorage.getLogPrefix(), System.identityHashCode(this), segmentMetadata, offset, length, blockMetadata);
}
}
if (segmentMetadata.isStorageSystemSegment()) {
SLTS_SYS_READ_INDEX_BLOCK_LOOKUP_LATENCY.reportSuccessEvent(indexLookupTimer.getElapsed());
} else {
SLTS_READ_INDEX_BLOCK_LOOKUP_LATENCY.reportSuccessEvent(indexLookupTimer.getElapsed());
}
}, chunkedSegmentStorage.getExecutor());
} else {
f = CompletableFuture.completedFuture(null);
}
val readIndexTimer = new Timer();
// Navigate to the chunk that contains the first byte of requested data.
return f.thenComposeAsync(vv -> Futures.loop(() -> currentChunkName != null && !isLoopExited, () -> txn.get(currentChunkName).thenAcceptAsync(storageMetadata -> {
chunkToReadFrom = (ChunkMetadata) storageMetadata;
Preconditions.checkState(null != chunkToReadFrom, "chunkToReadFrom is null. currentChunkName=%s Segment=%s", currentChunkName, segmentMetadata.getName());
if (startOffsetForCurrentChunk.get() <= currentOffset.get() && startOffsetForCurrentChunk.get() + chunkToReadFrom.getLength() > currentOffset.get()) {
// we have found a chunk that contains first byte we want to read
log.debug("{} read - found chunk to read - op={}, segment={}, chunk={}, startOffset={}, length={}, readOffset={}.", chunkedSegmentStorage.getLogPrefix(), System.identityHashCode(this), handle.getSegmentName(), chunkToReadFrom, startOffsetForCurrentChunk.get(), chunkToReadFrom.getLength(), currentOffset);
isLoopExited = true;
return;
}
currentChunkName = chunkToReadFrom.getNextChunk();
startOffsetForCurrentChunk.addAndGet(chunkToReadFrom.getLength());
// Update read index with newly visited chunk.
if (null != currentChunkName) {
chunkedSegmentStorage.getReadIndexCache().addIndexEntry(handle.getSegmentName(), currentChunkName, startOffsetForCurrentChunk.get());
}
cntScanned.incrementAndGet();
}, chunkedSegmentStorage.getExecutor()), chunkedSegmentStorage.getExecutor()).thenAcceptAsync(v -> {
val elapsed = readIndexTimer.getElapsed();
if (segmentMetadata.isStorageSystemSegment()) {
SLTS_SYS_READ_INDEX_SCAN_LATENCY.reportSuccessEvent(elapsed);
SLTS_SYS_READ_INDEX_NUM_SCANNED.reportSuccessValue(cntScanned.get());
} else {
SLTS_READ_INDEX_SCAN_LATENCY.reportSuccessEvent(elapsed);
SLTS_READ_INDEX_NUM_SCANNED.reportSuccessValue(cntScanned.get());
}
// Prefetch possible chunks for next read.
if (chunkToReadFrom.getNextChunk() != null) {
// Do not wait.
txn.get(chunkToReadFrom.getNextChunk());
}
log.debug("{} read - chunk lookup - op={}, segment={}, offset={}, scanned={}, latency={}.", chunkedSegmentStorage.getLogPrefix(), System.identityHashCode(this), handle.getSegmentName(), offset, cntScanned.get(), elapsed.toMillis());
}, chunkedSegmentStorage.getExecutor()), chunkedSegmentStorage.getExecutor());
}
Aggregations