Search in sources :

Example 1 with KafkaChangelogSSPOffset

use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.

the class TestTaskStorageCommitManager method testPersistToFileSystemCheckpointV1AndV2Checkpoint.

@Test
public void testPersistToFileSystemCheckpointV1AndV2Checkpoint() throws IOException {
    ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
    StorageEngine mockLPStore = mock(StorageEngine.class);
    StoreProperties lpStoreProps = mock(StoreProperties.class);
    when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
    when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
    when(lpStoreProps.isDurableStore()).thenReturn(true);
    Path mockPath = mock(Path.class);
    when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
    StorageEngine mockPStore = mock(StorageEngine.class);
    StoreProperties pStoreProps = mock(StoreProperties.class);
    when(mockPStore.getStoreProperties()).thenReturn(pStoreProps);
    when(pStoreProps.isPersistedToDisk()).thenReturn(true);
    when(pStoreProps.isDurableStore()).thenReturn(false);
    StorageEngine mockLIStore = mock(StorageEngine.class);
    StoreProperties liStoreProps = mock(StoreProperties.class);
    when(mockLIStore.getStoreProperties()).thenReturn(liStoreProps);
    when(liStoreProps.isPersistedToDisk()).thenReturn(false);
    when(liStoreProps.isDurableStore()).thenReturn(true);
    StorageEngine mockIStore = mock(StorageEngine.class);
    StoreProperties iStoreProps = mock(StoreProperties.class);
    when(mockIStore.getStoreProperties()).thenReturn(iStoreProps);
    when(iStoreProps.isPersistedToDisk()).thenReturn(false);
    when(iStoreProps.isDurableStore()).thenReturn(false);
    Map<String, StorageEngine> taskStores = ImmutableMap.of("loggedPersistentStore", mockLPStore, "persistentStore", mockPStore, "loggedInMemStore", mockLIStore, "inMemStore", mockIStore);
    Partition changelogPartition = new Partition(0);
    SystemStream changelogSystemStream = new SystemStream("changelogSystem", "changelogStream");
    SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSystemStream, changelogPartition);
    Map<String, SystemStream> storeChangelogsStreams = ImmutableMap.of("loggedPersistentStore", changelogSystemStream, "loggedInMemStore", new SystemStream("system", "stream"));
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    File durableStoreDir = new File("durableStorePath");
    when(storageManagerUtil.getTaskStoreDir(eq(durableStoreDir), any(), any(), any())).thenReturn(durableStoreDir);
    TaskName taskName = new TaskName("task");
    TaskInstanceMetrics metrics = mock(TaskInstanceMetrics.class);
    Timer checkpointTimer = mock(Timer.class);
    when(metrics.storeCheckpointNs()).thenReturn(checkpointTimer);
    when(containerStorageManager.getAllStores(taskName)).thenReturn(taskStores);
    TaskStorageCommitManager commitManager = spy(new TaskStorageCommitManager(taskName, Collections.emptyMap(), containerStorageManager, storeChangelogsStreams, changelogPartition, null, null, ForkJoinPool.commonPool(), storageManagerUtil, durableStoreDir, metrics));
    doNothing().when(commitManager).writeChangelogOffsetFile(any(), any(), any(), any());
    when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenAnswer((Answer<String>) invocation -> {
        File file = invocation.getArgumentAt(0, File.class);
        CheckpointId checkpointId = invocation.getArgumentAt(1, CheckpointId.class);
        return file + "-" + checkpointId;
    });
    CheckpointId newCheckpointId = CheckpointId.create();
    String newestOffset = "1";
    KafkaChangelogSSPOffset kafkaChangelogSSPOffset = new KafkaChangelogSSPOffset(newCheckpointId, newestOffset);
    Map<SystemStreamPartition, String> offsetsJava = ImmutableMap.of(changelogSSP, kafkaChangelogSSPOffset.toString());
    commitManager.init();
    // invoke persist to file system for v2 checkpoint
    commitManager.writeCheckpointToStoreDirectories(new CheckpointV1(offsetsJava));
    verify(commitManager).writeChangelogOffsetFiles(offsetsJava);
    // evoked twice, for OFFSET-V1 and OFFSET-V2
    verify(commitManager).writeChangelogOffsetFile(eq("loggedPersistentStore"), eq(changelogSSP), eq(newestOffset), eq(durableStoreDir));
    File checkpointFile = Paths.get(storageManagerUtil.getStoreCheckpointDir(durableStoreDir, kafkaChangelogSSPOffset.getCheckpointId())).toFile();
    verify(commitManager).writeChangelogOffsetFile(eq("loggedPersistentStore"), eq(changelogSSP), eq(newestOffset), eq(checkpointFile));
    Map<String, String> storeSCM = ImmutableMap.of("loggedPersistentStore", "system;loggedPersistentStoreStream;1", "persistentStore", "system;persistentStoreStream;1", "loggedInMemStore", "system;loggedInMemStoreStream;1", "inMemStore", "system;inMemStoreStream;1");
    CheckpointV2 checkpoint = new CheckpointV2(newCheckpointId, Collections.emptyMap(), Collections.singletonMap("factory", storeSCM));
    // invoke persist to file system for v2 checkpoint
    commitManager.writeCheckpointToStoreDirectories(checkpoint);
    // Validate only durable and persisted stores are persisted
    // This should be evoked twice, for checkpointV1 and checkpointV2
    verify(storageManagerUtil, times(2)).getTaskStoreDir(eq(durableStoreDir), eq("loggedPersistentStore"), eq(taskName), any());
    File checkpointPath = Paths.get(storageManagerUtil.getStoreCheckpointDir(durableStoreDir, newCheckpointId)).toFile();
    verify(storageManagerUtil).writeCheckpointV2File(eq(checkpointPath), eq(checkpoint));
}
Also used : Path(java.nio.file.Path) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Mockito.spy(org.mockito.Mockito.spy) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) Answer(org.mockito.stubbing.Answer) Mockito.doThrow(org.mockito.Mockito.doThrow) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Assert.fail(org.junit.Assert.fail) Mockito.anyLong(org.mockito.Mockito.anyLong) Path(java.nio.file.Path) MapConfig(org.apache.samza.config.MapConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Timer(org.apache.samza.metrics.Timer) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Test(org.junit.Test) Mockito.times(org.mockito.Mockito.times) Mockito.doNothing(org.mockito.Mockito.doNothing) Mockito.when(org.mockito.Mockito.when) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) Mockito.verify(org.mockito.Mockito.verify) Matchers.any(org.mockito.Matchers.any) TaskMode(org.apache.samza.job.model.TaskMode) Mockito.never(org.mockito.Mockito.never) FileFilter(java.io.FileFilter) Paths(java.nio.file.Paths) ForkJoinPool(java.util.concurrent.ForkJoinPool) Optional(java.util.Optional) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemStream(org.apache.samza.system.SystemStream) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) Timer(org.apache.samza.metrics.Timer) TaskName(org.apache.samza.container.TaskName) CheckpointId(org.apache.samza.checkpoint.CheckpointId) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) File(java.io.File) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 2 with KafkaChangelogSSPOffset

use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.

the class TestTaskStorageCommitManager method testWriteChangelogOffsetFilesV2andV1.

@Test
public void testWriteChangelogOffsetFilesV2andV1() throws IOException {
    Map<String, Map<SystemStreamPartition, String>> mockFileSystem = new HashMap<>();
    ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
    Map<String, CheckpointV2> mockCheckpointFileSystem = new HashMap<>();
    StorageEngine mockLPStore = mock(StorageEngine.class);
    StoreProperties lpStoreProps = mock(StoreProperties.class);
    when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
    when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
    when(lpStoreProps.isDurableStore()).thenReturn(true);
    Path mockPath = mock(Path.class);
    when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
    TaskInstanceMetrics metrics = mock(TaskInstanceMetrics.class);
    Timer checkpointTimer = mock(Timer.class);
    when(metrics.storeCheckpointNs()).thenReturn(checkpointTimer);
    java.util.Map<String, StorageEngine> taskStores = ImmutableMap.of("loggedPersistentStore", mockLPStore);
    Partition changelogPartition = new Partition(0);
    SystemStream changelogSystemStream = new SystemStream("changelogSystem", "changelogStream");
    SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSystemStream, changelogPartition);
    java.util.Map<String, SystemStream> storeChangelogsStreams = ImmutableMap.of("loggedPersistentStore", changelogSystemStream);
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    File tmpTestPath = new File("store-checkpoint-test");
    when(storageManagerUtil.getTaskStoreDir(eq(tmpTestPath), eq("loggedPersistentStore"), any(), any())).thenReturn(tmpTestPath);
    TaskName taskName = new TaskName("task");
    when(containerStorageManager.getAllStores(taskName)).thenReturn(taskStores);
    TaskStorageCommitManager commitManager = spy(new TaskStorageCommitManager(taskName, Collections.emptyMap(), containerStorageManager, storeChangelogsStreams, changelogPartition, null, null, ForkJoinPool.commonPool(), storageManagerUtil, tmpTestPath, metrics));
    doAnswer(invocation -> {
        String fileDir = invocation.getArgumentAt(3, File.class).getName();
        SystemStreamPartition ssp = invocation.getArgumentAt(1, SystemStreamPartition.class);
        String offset = invocation.getArgumentAt(2, String.class);
        if (mockFileSystem.containsKey(fileDir)) {
            mockFileSystem.get(fileDir).put(ssp, offset);
        } else {
            Map<SystemStreamPartition, String> sspOffsets = new HashMap<>();
            sspOffsets.put(ssp, offset);
            mockFileSystem.put(fileDir, sspOffsets);
        }
        return null;
    }).when(commitManager).writeChangelogOffsetFile(any(), any(), any(), any());
    doAnswer(invocation -> {
        String storeDir = invocation.getArgumentAt(0, File.class).getName();
        CheckpointV2 checkpointV2 = invocation.getArgumentAt(1, CheckpointV2.class);
        mockCheckpointFileSystem.put(storeDir, checkpointV2);
        return null;
    }).when(storageManagerUtil).writeCheckpointV2File(any(), any());
    when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenAnswer((Answer<String>) invocation -> {
        File file = invocation.getArgumentAt(0, File.class);
        CheckpointId checkpointId = invocation.getArgumentAt(1, CheckpointId.class);
        return file + "-" + checkpointId;
    });
    CheckpointId newCheckpointId = CheckpointId.create();
    String newestOffset = "1";
    KafkaChangelogSSPOffset kafkaChangelogSSPOffset = new KafkaChangelogSSPOffset(newCheckpointId, newestOffset);
    java.util.Map<SystemStreamPartition, String> offsetsJava = ImmutableMap.of(changelogSSP, kafkaChangelogSSPOffset.toString());
    commitManager.init();
    // invoke persist to file system for v1 checkpoint
    commitManager.writeCheckpointToStoreDirectories(new CheckpointV1(offsetsJava));
    assertEquals(2, mockFileSystem.size());
    // check if v2 offsets are written correctly
    String v2FilePath = storageManagerUtil.getStoreCheckpointDir(tmpTestPath, newCheckpointId);
    assertTrue(mockFileSystem.containsKey(v2FilePath));
    assertTrue(mockFileSystem.get(v2FilePath).containsKey(changelogSSP));
    assertEquals(1, mockFileSystem.get(v2FilePath).size());
    assertEquals(newestOffset, mockFileSystem.get(v2FilePath).get(changelogSSP));
    // check if v1 offsets are written correctly
    String v1FilePath = tmpTestPath.getPath();
    assertTrue(mockFileSystem.containsKey(v1FilePath));
    assertTrue(mockFileSystem.get(v1FilePath).containsKey(changelogSSP));
    assertEquals(1, mockFileSystem.get(v1FilePath).size());
    assertEquals(newestOffset, mockFileSystem.get(v1FilePath).get(changelogSSP));
    java.util.Map<String, String> storeSCM = ImmutableMap.of("loggedPersistentStore", "system;loggedPersistentStoreStream;1", "persistentStore", "system;persistentStoreStream;1", "loggedInMemStore", "system;loggedInMemStoreStream;1", "inMemStore", "system;inMemStoreStream;1");
    CheckpointV2 checkpoint = new CheckpointV2(newCheckpointId, Collections.emptyMap(), Collections.singletonMap("factory", storeSCM));
    // invoke persist to file system with checkpoint v2
    commitManager.writeCheckpointToStoreDirectories(checkpoint);
    assertTrue(mockCheckpointFileSystem.containsKey(v2FilePath));
    assertEquals(checkpoint, mockCheckpointFileSystem.get(v2FilePath));
    assertTrue(mockCheckpointFileSystem.containsKey(v1FilePath));
    assertEquals(checkpoint, mockCheckpointFileSystem.get(v1FilePath));
    assertEquals(2, mockCheckpointFileSystem.size());
    CheckpointV2 updatedCheckpoint = new CheckpointV2(newCheckpointId, ImmutableMap.of(new SystemStreamPartition("inputSystem", "inputStream", changelogPartition), "5"), Collections.singletonMap("factory", storeSCM));
    commitManager.writeCheckpointToStoreDirectories(updatedCheckpoint);
    assertEquals(updatedCheckpoint, mockCheckpointFileSystem.get(v2FilePath));
    assertEquals(updatedCheckpoint, mockCheckpointFileSystem.get(v1FilePath));
    assertEquals(2, mockCheckpointFileSystem.size());
}
Also used : CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Mockito.spy(org.mockito.Mockito.spy) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) Answer(org.mockito.stubbing.Answer) Mockito.doThrow(org.mockito.Mockito.doThrow) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Assert.fail(org.junit.Assert.fail) Mockito.anyLong(org.mockito.Mockito.anyLong) Path(java.nio.file.Path) MapConfig(org.apache.samza.config.MapConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Timer(org.apache.samza.metrics.Timer) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Test(org.junit.Test) Mockito.times(org.mockito.Mockito.times) Mockito.doNothing(org.mockito.Mockito.doNothing) Mockito.when(org.mockito.Mockito.when) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) Mockito.verify(org.mockito.Mockito.verify) Matchers.any(org.mockito.Matchers.any) TaskMode(org.apache.samza.job.model.TaskMode) Mockito.never(org.mockito.Mockito.never) FileFilter(java.io.FileFilter) Paths(java.nio.file.Paths) ForkJoinPool(java.util.concurrent.ForkJoinPool) Optional(java.util.Optional) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) HashMap(java.util.HashMap) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) Path(java.nio.file.Path) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemStream(org.apache.samza.system.SystemStream) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) Timer(org.apache.samza.metrics.Timer) TaskName(org.apache.samza.container.TaskName) CheckpointId(org.apache.samza.checkpoint.CheckpointId) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 3 with KafkaChangelogSSPOffset

use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.

the class TransactionalStateTaskRestoreManager method getCheckpointId.

private CheckpointId getCheckpointId(Checkpoint checkpoint) {
    if (checkpoint == null)
        return null;
    if (checkpoint instanceof CheckpointV1) {
        for (Map.Entry<String, SystemStream> storeNameSystemStream : storeChangelogs.entrySet()) {
            SystemStreamPartition storeChangelogSSP = new SystemStreamPartition(storeNameSystemStream.getValue(), taskModel.getChangelogPartition());
            String checkpointMessage = checkpoint.getOffsets().get(storeChangelogSSP);
            if (StringUtils.isNotBlank(checkpointMessage)) {
                KafkaChangelogSSPOffset kafkaStateChanglogOffset = KafkaChangelogSSPOffset.fromString(checkpointMessage);
                return kafkaStateChanglogOffset.getCheckpointId();
            }
        }
    } else if (checkpoint instanceof CheckpointV2) {
        return ((CheckpointV2) checkpoint).getCheckpointId();
    } else {
        throw new SamzaException("Unsupported checkpoint version: " + checkpoint.getVersion());
    }
    return null;
}
Also used : CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) SystemStream(org.apache.samza.system.SystemStream) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) SamzaException(org.apache.samza.SamzaException) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 4 with KafkaChangelogSSPOffset

use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.

the class TransactionalStateTaskRestoreManager method getCheckpointedChangelogOffsets.

private Map<String, KafkaStateCheckpointMarker> getCheckpointedChangelogOffsets(Checkpoint checkpoint) {
    Map<String, KafkaStateCheckpointMarker> checkpointedChangelogOffsets = new HashMap<>();
    if (checkpoint == null)
        return checkpointedChangelogOffsets;
    if (checkpoint instanceof CheckpointV2) {
        Map<String, Map<String, String>> factoryStoreSCMs = ((CheckpointV2) checkpoint).getStateCheckpointMarkers();
        if (factoryStoreSCMs.containsKey(KafkaStateCheckpointMarker.KAFKA_STATE_BACKEND_FACTORY_NAME)) {
            factoryStoreSCMs.get(KafkaStateCheckpointMarker.KAFKA_STATE_BACKEND_FACTORY_NAME).forEach((storeName, scmString) -> {
                KafkaStateCheckpointMarker kafkaSCM = KafkaStateCheckpointMarker.deserialize(scmString);
                checkpointedChangelogOffsets.put(storeName, kafkaSCM);
            });
        }
    // skip the non-KafkaStateCheckpointMarkers
    } else if (checkpoint instanceof CheckpointV1) {
        // If the checkpoint v1 is used, we need to fetch the changelog SSPs in the inputOffsets in order to get the
        // store offset.
        Map<SystemStreamPartition, String> checkpointedOffsets = checkpoint.getOffsets();
        storeChangelogs.forEach((storeName, systemStream) -> {
            Partition changelogPartition = taskModel.getChangelogPartition();
            SystemStreamPartition storeChangelogSSP = new SystemStreamPartition(systemStream, changelogPartition);
            String checkpointedOffset = checkpointedOffsets.get(storeChangelogSSP);
            if (StringUtils.isNotBlank(checkpointedOffset)) {
                KafkaChangelogSSPOffset kafkaChangelogSSPOffset = KafkaChangelogSSPOffset.fromString(checkpointedOffset);
                KafkaStateCheckpointMarker marker = new KafkaStateCheckpointMarker(storeChangelogSSP, kafkaChangelogSSPOffset.getChangelogOffset());
                checkpointedChangelogOffsets.put(storeName, marker);
            }
        });
    } else {
        throw new SamzaException("Unsupported checkpoint version: " + checkpoint.getVersion());
    }
    return checkpointedChangelogOffsets;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) ListMultimap(com.google.common.collect.ListMultimap) SSPMetadataCache(org.apache.samza.system.SSPMetadataCache) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) FileUtil(org.apache.samza.util.FileUtil) SystemConsumer(org.apache.samza.system.SystemConsumer) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) ExecutorService(java.util.concurrent.ExecutorService) StorageConfig(org.apache.samza.config.StorageConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) Partition(org.apache.samza.Partition) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Clock(org.apache.samza.util.Clock) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ChangelogSSPIterator(org.apache.samza.system.ChangelogSSPIterator) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) KafkaStateCheckpointMarker(org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Config(org.apache.samza.config.Config) Collections(java.util.Collections) SystemAdmins(org.apache.samza.system.SystemAdmins) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) SamzaException(org.apache.samza.SamzaException) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) KafkaStateCheckpointMarker(org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 5 with KafkaChangelogSSPOffset

use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.

the class TaskStorageCommitManager method writeChangelogOffsetFiles.

/**
 * Writes the newest changelog ssp offset for each logged and persistent store to the OFFSET file in the current
 * store directory (for allowing rollbacks). If the Kafka transactional backup manager is enabled, also writes to
 * the store checkpoint directory.
 *
 * These files are used during container startup to ensure transactional state, and to determine whether the
 * there is any new information in the changelog that is not reflected in the on-disk copy of the store.
 * If there is any delta, it is replayed from the changelog. E.g. this can happen if the job was run on this host,
 * then another host, and then back to this host.
 */
@VisibleForTesting
void writeChangelogOffsetFiles(Map<SystemStreamPartition, String> checkpointOffsets) {
    if (storageEngines == null) {
        throw new SamzaException(String.format("Storage engines are not initialized and writeChangelogOffsetFiles not be written for task %s", taskName));
    }
    storeChangelogs.forEach((storeName, systemStream) -> {
        SystemStreamPartition changelogSSP = new SystemStreamPartition(systemStream.getSystem(), systemStream.getStream(), taskChangelogPartition);
        // Only write if the store is durable and persisted to disk
        if (checkpointOffsets.containsKey(changelogSSP) && storageEngines.containsKey(storeName) && storageEngines.get(storeName).getStoreProperties().isDurableStore() && storageEngines.get(storeName).getStoreProperties().isPersistedToDisk()) {
            LOG.debug("Writing changelog offset for taskName {} store {} changelog {}.", taskName, storeName, systemStream);
            File currentStoreDir = storageManagerUtil.getTaskStoreDir(durableStoreBaseDir, storeName, taskName, TaskMode.Active);
            try {
                KafkaChangelogSSPOffset kafkaChangelogSSPOffset = KafkaChangelogSSPOffset.fromString(checkpointOffsets.get(changelogSSP));
                // Write offsets to file system if it is non-null
                String newestOffset = kafkaChangelogSSPOffset.getChangelogOffset();
                if (newestOffset != null) {
                    // Write changelog SSP offset to the OFFSET files in the task store directory
                    writeChangelogOffsetFile(storeName, changelogSSP, newestOffset, currentStoreDir);
                    // Write changelog SSP offset to the OFFSET files in the store checkpoint directory
                    File checkpointDir = Paths.get(storageManagerUtil.getStoreCheckpointDir(currentStoreDir, kafkaChangelogSSPOffset.getCheckpointId())).toFile();
                    writeChangelogOffsetFile(storeName, changelogSSP, newestOffset, checkpointDir);
                } else {
                    // If newestOffset is null, then it means the changelog ssp is (or has become) empty. This could be
                    // either because the changelog topic was newly added, repartitioned, or manually deleted and recreated.
                    // No need to persist the offset file.
                    LOG.debug("Deleting OFFSET file for taskName {} store {} changelog ssp {} since the newestOffset is null.", taskName, storeName, changelogSSP);
                    storageManagerUtil.deleteOffsetFile(currentStoreDir);
                }
            } catch (IOException e) {
                throw new SamzaException(String.format("Error storing offset for taskName %s store %s changelog %s.", taskName, storeName, systemStream), e);
            }
        }
    });
    LOG.debug("Done writing OFFSET files for logged persistent key value stores for task {}", taskName);
}
Also used : KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) File(java.io.File) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

KafkaChangelogSSPOffset (org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset)7 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 File (java.io.File)6 HashMap (java.util.HashMap)6 Map (java.util.Map)6 SamzaException (org.apache.samza.SamzaException)6 CheckpointV1 (org.apache.samza.checkpoint.CheckpointV1)6 SystemStream (org.apache.samza.system.SystemStream)6 Partition (org.apache.samza.Partition)5 CheckpointId (org.apache.samza.checkpoint.CheckpointId)5 CheckpointV2 (org.apache.samza.checkpoint.CheckpointV2)5 TaskName (org.apache.samza.container.TaskName)5 IOException (java.io.IOException)4 Path (java.nio.file.Path)4 Collections (java.util.Collections)4 Optional (java.util.Optional)4 CompletableFuture (java.util.concurrent.CompletableFuture)4 Checkpoint (org.apache.samza.checkpoint.Checkpoint)4 TaskInstanceMetrics (org.apache.samza.container.TaskInstanceMetrics)4