use of org.apache.samza.checkpoint.CheckpointManager in project samza by apache.
the class TestTaskStorageCommitManager method testCommitManagerStart.
@Test
public void testCommitManagerStart() {
CheckpointManager checkpointManager = mock(CheckpointManager.class);
TaskBackupManager taskBackupManager1 = mock(TaskBackupManager.class);
TaskBackupManager taskBackupManager2 = mock(TaskBackupManager.class);
ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
Checkpoint checkpoint = mock(Checkpoint.class);
TaskName taskName = new TaskName("task1");
Map<String, TaskBackupManager> backupManagers = ImmutableMap.of("factory1", taskBackupManager1, "factory2", taskBackupManager2);
TaskStorageCommitManager cm = new TaskStorageCommitManager(taskName, backupManagers, containerStorageManager, Collections.emptyMap(), new Partition(1), checkpointManager, new MapConfig(), ForkJoinPool.commonPool(), new StorageManagerUtil(), null, null);
when(checkpointManager.readLastCheckpoint(taskName)).thenReturn(checkpoint);
cm.init();
verify(taskBackupManager1).init(eq(checkpoint));
verify(taskBackupManager2).init(eq(checkpoint));
}
use of org.apache.samza.checkpoint.CheckpointManager in project samza by apache.
the class StreamManager method clearStreamsFromPreviousRun.
/**
* This is a best-effort approach to clear the internal streams from previous run, including intermediate streams,
* checkpoint stream and changelog streams.
* For batch processing, we always clean up the previous internal streams and create a new set for each run.
* @param prevConfig config of the previous run
*/
public void clearStreamsFromPreviousRun(Config prevConfig) {
try {
ApplicationConfig appConfig = new ApplicationConfig(prevConfig);
LOGGER.info("run.id from previous run is {}", appConfig.getRunId());
StreamConfig streamConfig = new StreamConfig(prevConfig);
// Find all intermediate streams and clean up
Set<StreamSpec> intStreams = streamConfig.getStreamIds().stream().filter(streamConfig::getIsIntermediateStream).map(id -> new StreamSpec(id, streamConfig.getPhysicalName(id), streamConfig.getSystem(id))).collect(Collectors.toSet());
intStreams.forEach(stream -> {
LOGGER.info("Clear intermediate stream {} in system {}", stream.getPhysicalName(), stream.getSystemName());
systemAdmins.getSystemAdmin(stream.getSystemName()).clearStream(stream);
});
// Find checkpoint stream and clean up
TaskConfig taskConfig = new TaskConfig(prevConfig);
taskConfig.getCheckpointManager(new MetricsRegistryMap()).ifPresent(CheckpointManager::clearCheckpoints);
// Find changelog streams and remove them
StorageConfig storageConfig = new StorageConfig(prevConfig);
for (String store : storageConfig.getStoreNames()) {
String changelog = storageConfig.getChangelogStream(store).orElse(null);
if (changelog != null) {
LOGGER.info("Clear store {} changelog {}", store, changelog);
SystemStream systemStream = StreamUtil.getSystemStreamFromNames(changelog);
StreamSpec spec = StreamSpec.createChangeLogStreamSpec(systemStream.getStream(), systemStream.getSystem(), 1);
systemAdmins.getSystemAdmin(spec.getSystemName()).clearStream(spec);
}
}
} catch (Exception e) {
// For batch, we always create a new set of internal streams (checkpoint, changelog and intermediate) with unique
// id. So if clearStream doesn't work, it won't affect the correctness of the results.
// We log a warning here and rely on retention to clean up the streams later.
LOGGER.warn("Fail to clear internal streams from previous run. Please clean up manually.", e);
}
}
use of org.apache.samza.checkpoint.CheckpointManager in project samza by apache.
the class StorageRecovery method getContainerStorageManagers.
/**
* create one TaskStorageManager for each task. Add all of them to the
* List<TaskStorageManager>
*/
@SuppressWarnings("rawtypes")
private void getContainerStorageManagers() {
Set<String> factoryClasses = new StorageConfig(jobConfig).getRestoreFactories();
Map<String, StateBackendFactory> stateBackendFactories = factoryClasses.stream().collect(Collectors.toMap(factoryClass -> factoryClass, factoryClass -> ReflectionUtil.getObj(factoryClass, StateBackendFactory.class)));
Clock clock = SystemClock.instance();
StreamMetadataCache streamMetadataCache = new StreamMetadataCache(systemAdmins, 5000, clock);
// don't worry about prefetching for this; looks like the tool doesn't flush to offset files anyways
Map<String, SystemFactory> systemFactories = new SystemConfig(jobConfig).getSystemFactories();
CheckpointManager checkpointManager = new TaskConfig(jobConfig).getCheckpointManager(new MetricsRegistryMap()).orElse(null);
for (ContainerModel containerModel : containers.values()) {
ContainerContext containerContext = new ContainerContextImpl(containerModel, new MetricsRegistryMap());
ContainerStorageManager containerStorageManager = new ContainerStorageManager(checkpointManager, containerModel, streamMetadataCache, systemAdmins, changeLogSystemStreams, new HashMap<>(), storageEngineFactories, systemFactories, this.getSerdes(), jobConfig, new HashMap<>(), new SamzaContainerMetrics(containerModel.getId(), new MetricsRegistryMap(), ""), JobContextImpl.fromConfigWithDefaults(jobConfig, jobModel), containerContext, stateBackendFactories, new HashMap<>(), storeBaseDir, storeBaseDir, null, new SystemClock());
this.containerStorageManagers.put(containerModel.getId(), containerStorageManager);
}
}
use of org.apache.samza.checkpoint.CheckpointManager in project samza by apache.
the class TestContainerStorageManager method testNoConfiguredDurableStores.
@Test
public void testNoConfiguredDurableStores() throws InterruptedException {
taskRestoreMetricGauges = new HashMap<>();
this.tasks = new HashMap<>();
this.taskInstanceMetrics = new HashMap<>();
// Add two mocked tasks
addMockedTask("task 0", 0);
addMockedTask("task 1", 1);
// Mock container metrics
samzaContainerMetrics = mock(SamzaContainerMetrics.class);
when(samzaContainerMetrics.taskStoreRestorationMetrics()).thenReturn(taskRestoreMetricGauges);
// Create mocked configs for specifying serdes
Map<String, String> configMap = new HashMap<>();
configMap.put("serializers.registry.stringserde.class", StringSerdeFactory.class.getName());
configMap.put(TaskConfig.TRANSACTIONAL_STATE_RETAIN_EXISTING_STATE, "true");
Config config = new MapConfig(configMap);
Map<String, Serde<Object>> serdes = new HashMap<>();
serdes.put("stringserde", mock(Serde.class));
CheckpointManager checkpointManager = mock(CheckpointManager.class);
when(checkpointManager.readLastCheckpoint(any(TaskName.class))).thenReturn(new CheckpointV1(new HashMap<>()));
ContainerContext mockContainerContext = mock(ContainerContext.class);
ContainerModel mockContainerModel = new ContainerModel("samza-container-test", tasks);
when(mockContainerContext.getContainerModel()).thenReturn(mockContainerModel);
// Reset the expected number of sysConsumer create, start and stop calls, and store.restore() calls
this.systemConsumerCreationCount = 0;
this.systemConsumerStartCount = 0;
this.systemConsumerStopCount = 0;
this.storeRestoreCallCount = 0;
StateBackendFactory backendFactory = mock(StateBackendFactory.class);
TaskRestoreManager restoreManager = mock(TaskRestoreManager.class);
when(backendFactory.getRestoreManager(any(), any(), any(), any(), any(), any(), any(), any(), any(), any(), any())).thenReturn(restoreManager);
doAnswer(invocation -> {
storeRestoreCallCount++;
return CompletableFuture.completedFuture(null);
}).when(restoreManager).restore();
// Create the container storage manager
ContainerStorageManager containerStorageManager = new ContainerStorageManager(checkpointManager, mockContainerModel, mock(StreamMetadataCache.class), mock(SystemAdmins.class), new HashMap<>(), new HashMap<>(), new HashMap<>(), new HashMap<>(), serdes, config, taskInstanceMetrics, samzaContainerMetrics, mock(JobContext.class), mockContainerContext, new HashMap<>(), mock(Map.class), DEFAULT_LOGGED_STORE_BASE_DIR, DEFAULT_STORE_BASE_DIR, null, new SystemClock());
containerStorageManager.start();
containerStorageManager.shutdown();
for (Gauge gauge : taskRestoreMetricGauges.values()) {
Assert.assertTrue("Restoration time gauge value should never be invoked", mockingDetails(gauge).getInvocations().size() == 0);
}
Assert.assertEquals("Store restore count should be 2 because there are 0 stores", 0, this.storeRestoreCallCount);
Assert.assertEquals(0, this.systemConsumerCreationCount);
Assert.assertEquals(0, this.systemConsumerStopCount);
Assert.assertEquals(0, this.systemConsumerStartCount);
}
use of org.apache.samza.checkpoint.CheckpointManager in project samza by apache.
the class TestContainerStorageManager method setUp.
/**
* Method to create a containerStorageManager with mocked dependencies
*/
@Before
public void setUp() throws InterruptedException {
taskRestoreMetricGauges = new HashMap<>();
this.tasks = new HashMap<>();
this.taskInstanceMetrics = new HashMap<>();
// Add two mocked tasks
addMockedTask("task 0", 0);
addMockedTask("task 1", 1);
// Mock container metrics
samzaContainerMetrics = mock(SamzaContainerMetrics.class);
when(samzaContainerMetrics.taskStoreRestorationMetrics()).thenReturn(taskRestoreMetricGauges);
// Create a map of test changeLogSSPs
Map<String, SystemStream> changelogSystemStreams = new HashMap<>();
changelogSystemStreams.put(STORE_NAME, new SystemStream(SYSTEM_NAME, STREAM_NAME));
// Create mocked storage engine factories
Map<String, StorageEngineFactory<Object, Object>> storageEngineFactories = new HashMap<>();
StorageEngineFactory mockStorageEngineFactory = (StorageEngineFactory<Object, Object>) mock(StorageEngineFactory.class);
StorageEngine mockStorageEngine = mock(StorageEngine.class);
when(mockStorageEngine.getStoreProperties()).thenReturn(new StoreProperties.StorePropertiesBuilder().setLoggedStore(true).setPersistedToDisk(true).build());
doAnswer(invocation -> {
return mockStorageEngine;
}).when(mockStorageEngineFactory).getStorageEngine(anyString(), any(), any(), any(), any(), any(), any(), any(), any(), any());
storageEngineFactories.put(STORE_NAME, mockStorageEngineFactory);
// Add instrumentation to mocked storage engine, to record the number of store.restore() calls
doAnswer(invocation -> {
storeRestoreCallCount++;
return CompletableFuture.completedFuture(null);
}).when(mockStorageEngine).restore(any());
// Set the mocked stores' properties to be persistent
doAnswer(invocation -> {
return new StoreProperties.StorePropertiesBuilder().setLoggedStore(true).build();
}).when(mockStorageEngine).getStoreProperties();
// Mock and setup sysconsumers
SystemConsumer mockSystemConsumer = mock(SystemConsumer.class);
doAnswer(invocation -> {
systemConsumerStartCount++;
return null;
}).when(mockSystemConsumer).start();
doAnswer(invocation -> {
systemConsumerStopCount++;
return null;
}).when(mockSystemConsumer).stop();
// Create mocked system factories
Map<String, SystemFactory> systemFactories = new HashMap<>();
// Count the number of sysConsumers created
SystemFactory mockSystemFactory = mock(SystemFactory.class);
doAnswer(invocation -> {
this.systemConsumerCreationCount++;
return mockSystemConsumer;
}).when(mockSystemFactory).getConsumer(anyString(), any(), any());
systemFactories.put(SYSTEM_NAME, mockSystemFactory);
// Create mocked configs for specifying serdes
Map<String, String> configMap = new HashMap<>();
configMap.put("stores." + STORE_NAME + ".key.serde", "stringserde");
configMap.put("stores." + STORE_NAME + ".msg.serde", "stringserde");
configMap.put("stores." + STORE_NAME + ".factory", mockStorageEngineFactory.getClass().getName());
configMap.put("stores." + STORE_NAME + ".changelog", SYSTEM_NAME + "." + STREAM_NAME);
configMap.put("serializers.registry.stringserde.class", StringSerdeFactory.class.getName());
configMap.put(TaskConfig.TRANSACTIONAL_STATE_RETAIN_EXISTING_STATE, "true");
Config config = new MapConfig(configMap);
Map<String, Serde<Object>> serdes = new HashMap<>();
serdes.put("stringserde", mock(Serde.class));
// Create mocked system admins
SystemAdmin mockSystemAdmin = mock(SystemAdmin.class);
doAnswer(new Answer<Void>() {
public Void answer(InvocationOnMock invocation) {
Object[] args = invocation.getArguments();
System.out.println("called with arguments: " + Arrays.toString(args));
return null;
}
}).when(mockSystemAdmin).validateStream(any());
SystemAdmins mockSystemAdmins = mock(SystemAdmins.class);
when(mockSystemAdmins.getSystemAdmin("kafka")).thenReturn(mockSystemAdmin);
// Create a mocked mockStreamMetadataCache
SystemStreamMetadata.SystemStreamPartitionMetadata sspMetadata = new SystemStreamMetadata.SystemStreamPartitionMetadata("0", "50", "51");
Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();
partitionMetadata.put(new Partition(0), sspMetadata);
partitionMetadata.put(new Partition(1), sspMetadata);
SystemStreamMetadata systemStreamMetadata = new SystemStreamMetadata(STREAM_NAME, partitionMetadata);
StreamMetadataCache mockStreamMetadataCache = mock(StreamMetadataCache.class);
when(mockStreamMetadataCache.getStreamMetadata(JavaConverters.asScalaSetConverter(new HashSet<SystemStream>(changelogSystemStreams.values())).asScala().toSet(), false)).thenReturn(new scala.collection.immutable.Map.Map1(new SystemStream(SYSTEM_NAME, STREAM_NAME), systemStreamMetadata));
CheckpointManager checkpointManager = mock(CheckpointManager.class);
when(checkpointManager.readLastCheckpoint(any(TaskName.class))).thenReturn(new CheckpointV1(new HashMap<>()));
SSPMetadataCache mockSSPMetadataCache = mock(SSPMetadataCache.class);
when(mockSSPMetadataCache.getMetadata(any(SystemStreamPartition.class))).thenReturn(new SystemStreamMetadata.SystemStreamPartitionMetadata("0", "10", "11"));
ContainerContext mockContainerContext = mock(ContainerContext.class);
ContainerModel mockContainerModel = new ContainerModel("samza-container-test", tasks);
when(mockContainerContext.getContainerModel()).thenReturn(mockContainerModel);
// Reset the expected number of sysConsumer create, start and stop calls, and store.restore() calls
this.systemConsumerCreationCount = 0;
this.systemConsumerStartCount = 0;
this.systemConsumerStopCount = 0;
this.storeRestoreCallCount = 0;
StateBackendFactory backendFactory = mock(StateBackendFactory.class);
TaskRestoreManager restoreManager = mock(TaskRestoreManager.class);
ArgumentCaptor<ExecutorService> restoreExecutorCaptor = ArgumentCaptor.forClass(ExecutorService.class);
when(backendFactory.getRestoreManager(any(), any(), any(), restoreExecutorCaptor.capture(), any(), any(), any(), any(), any(), any(), any())).thenReturn(restoreManager);
doAnswer(invocation -> {
storeRestoreCallCount++;
return CompletableFuture.completedFuture(null);
}).when(restoreManager).restore();
// Create the container storage manager
this.containerStorageManager = new ContainerStorageManager(checkpointManager, mockContainerModel, mockStreamMetadataCache, mockSystemAdmins, changelogSystemStreams, new HashMap<>(), storageEngineFactories, systemFactories, serdes, config, taskInstanceMetrics, samzaContainerMetrics, mock(JobContext.class), mockContainerContext, ImmutableMap.of(StorageConfig.KAFKA_STATE_BACKEND_FACTORY, backendFactory), mock(Map.class), DEFAULT_LOGGED_STORE_BASE_DIR, DEFAULT_STORE_BASE_DIR, null, new SystemClock());
}
Aggregations