use of io.pravega.segmentstore.server.tables.WriterTableProcessor in project pravega by pravega.
the class StreamSegmentContainerTests method testTableSegmentReadAfterCompactionAndRecovery.
/**
* Tests a non-trivial scenario in which ContainerKeyIndex may be tail-caching a stale version of a key if the
* following conditions occur:
* 1. StorageWriter processes values v0...vn for k1 and {@link WriterTableProcessor} indexes them.
* 2. As a result of {@link WriterTableProcessor} activity, the last value vn for k1 is moved to the tail of the Segment.
* 3. While TableCompactor works, a new PUT operation is appended to the Segment with new value vn+1 for k1.
* 4. At this point, the StorageWriter stops its progress and the container restarts without processing neither the
* new value vn+1 nor the compacted value vn for k1.
* 5. A subsequent restart will trigger the tail-caching from the last indexed offset, which points to vn+1.
* 6. The bug, which consists of the tail-caching process not taking care of table entry versions, would overwrite
* vn+1 with vn, just because it has a higher offset as it was written later in the Segment.
*/
@Test
public void testTableSegmentReadAfterCompactionAndRecovery() throws Exception {
@Cleanup TestContext context = new TestContext(DEFAULT_CONFIG, NO_TRUNCATIONS_DURABLE_LOG_CONFIG, DEFAULT_WRITER_CONFIG, null);
val durableLog = new AtomicReference<OperationLog>();
val durableLogFactory = new WatchableOperationLogFactory(context.operationLogFactory, durableLog::set);
// Data size and count to be written in this test.
int serializedEntryLength = 28;
int writtenEntries = 7;
@Cleanup StreamSegmentContainer container = new StreamSegmentContainer(CONTAINER_ID, DEFAULT_CONFIG, durableLogFactory, context.readIndexFactory, context.attributeIndexFactory, context.writerFactory, context.storageFactory, context.getDefaultExtensions(), executorService());
container.startAsync().awaitRunning();
Assert.assertNotNull(durableLog.get());
val tableStore = container.getExtension(ContainerTableExtension.class);
// 1. Create the Table Segment and get a DirectSegmentAccess to it to monitor its size.
String tableSegmentName = getSegmentName(0) + "_Table";
val type = SegmentType.builder(getSegmentType(tableSegmentName)).tableSegment().build();
tableStore.createSegment(tableSegmentName, type, TIMEOUT).join();
DirectSegmentAccess directTableSegment = container.forSegment(tableSegmentName, TIMEOUT).join();
// 2. Add some entries to the table segments. Note tha we write multiple values to each key, so the TableCompactor
// can find entries to move to the tail.
final BiFunction<String, Integer, TableEntry> createTableEntry = (key, value) -> TableEntry.unversioned(new ByteArraySegment(key.getBytes()), new ByteArraySegment(String.format("Value_%s", value).getBytes()));
// 3. This callback will run when the StorageWriter writes data to Storage. At this point, StorageWriter would
// have completed its first iteration, so it is the time to add a new value for key1 while TableCompactor is working.
val compactedEntry = List.of(TableEntry.versioned(new ByteArraySegment("key1".getBytes(StandardCharsets.UTF_8)), new ByteArraySegment("3".getBytes(StandardCharsets.UTF_8)), serializedEntryLength * 2L));
// Simulate that Table Compactor moves [k1, 3] to the tail of the Segment as a result of compacting the first 4 entries.
val compactedEntryUpdate = EntrySerializerTests.generateUpdateWithExplicitVersion(compactedEntry);
CompletableFuture<Void> callbackExecuted = new CompletableFuture<>();
context.storageFactory.getPostWriteCallback().set((segmentHandle, offset) -> {
if (segmentHandle.getSegmentName().contains("Segment_0_Table$attributes.index") && !callbackExecuted.isDone()) {
// New PUT with the newest value.
Futures.toVoid(tableStore.put(tableSegmentName, Collections.singletonList(createTableEntry.apply("key1", 4)), TIMEOUT)).join();
// Simulates a compacted entry append performed by Table Compactor.
directTableSegment.append(compactedEntryUpdate, null, TIMEOUT).join();
callbackExecuted.complete(null);
}
});
// Do the actual puts.
Futures.toVoid(tableStore.put(tableSegmentName, Collections.singletonList(createTableEntry.apply("key1", 1)), TIMEOUT)).join();
Futures.toVoid(tableStore.put(tableSegmentName, Collections.singletonList(createTableEntry.apply("key1", 2)), TIMEOUT)).join();
Futures.toVoid(tableStore.put(tableSegmentName, Collections.singletonList(createTableEntry.apply("key1", 3)), TIMEOUT)).join();
Futures.toVoid(tableStore.put(tableSegmentName, Collections.singletonList(createTableEntry.apply("key2", 1)), TIMEOUT)).join();
Futures.toVoid(tableStore.put(tableSegmentName, Collections.singletonList(createTableEntry.apply("key2", 2)), TIMEOUT)).join();
Futures.toVoid(tableStore.put(tableSegmentName, Collections.singletonList(createTableEntry.apply("key2", 3)), TIMEOUT)).join();
// 4. Above, the test does 7 puts, each one 28 bytes in size (6 entries directly, 1 via callback). Now, we need
// to wait for the TableCompactor writing the entry (key1, 3) to the tail of the Segment.
callbackExecuted.join();
AssertExtensions.assertEventuallyEquals(true, () -> directTableSegment.getInfo().getLength() > (long) serializedEntryLength * writtenEntries, 5000);
// 5. The TableCompactor has moved the entry, so we immediately stop the container to prevent StorageWriter from
// making more progress.
container.close();
// 6. Create a new container instance that will recover from existing data.
@Cleanup val container2 = new StreamSegmentContainer(CONTAINER_ID, DEFAULT_CONFIG, durableLogFactory, context.readIndexFactory, context.attributeIndexFactory, context.writerFactory, context.storageFactory, context.getDefaultExtensions(), executorService());
container2.startAsync().awaitRunning();
// 7. Verify that (key1, 4) is the actual value after performing the tail-caching process, which now takes care
// of entry versions.
val expected = createTableEntry.apply("key1", 4);
val tableStore2 = container2.getExtension(ContainerTableExtension.class);
val actual = tableStore2.get(tableSegmentName, Collections.singletonList(expected.getKey().getKey()), TIMEOUT).get(TIMEOUT.toMillis(), TimeUnit.MILLISECONDS).get(0);
Assert.assertEquals(actual.getKey().getKey(), expected.getKey().getKey());
Assert.assertEquals(actual.getValue(), expected.getValue());
}
Aggregations