Search in sources :

Example 6 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class StorageRecovery method getContainerStorageManagers.

/**
 * create one TaskStorageManager for each task. Add all of them to the
 * List<TaskStorageManager>
 */
@SuppressWarnings("rawtypes")
private void getContainerStorageManagers() {
    Set<String> factoryClasses = new StorageConfig(jobConfig).getRestoreFactories();
    Map<String, StateBackendFactory> stateBackendFactories = factoryClasses.stream().collect(Collectors.toMap(factoryClass -> factoryClass, factoryClass -> ReflectionUtil.getObj(factoryClass, StateBackendFactory.class)));
    Clock clock = SystemClock.instance();
    StreamMetadataCache streamMetadataCache = new StreamMetadataCache(systemAdmins, 5000, clock);
    // don't worry about prefetching for this; looks like the tool doesn't flush to offset files anyways
    Map<String, SystemFactory> systemFactories = new SystemConfig(jobConfig).getSystemFactories();
    CheckpointManager checkpointManager = new TaskConfig(jobConfig).getCheckpointManager(new MetricsRegistryMap()).orElse(null);
    for (ContainerModel containerModel : containers.values()) {
        ContainerContext containerContext = new ContainerContextImpl(containerModel, new MetricsRegistryMap());
        ContainerStorageManager containerStorageManager = new ContainerStorageManager(checkpointManager, containerModel, streamMetadataCache, systemAdmins, changeLogSystemStreams, new HashMap<>(), storageEngineFactories, systemFactories, this.getSerdes(), jobConfig, new HashMap<>(), new SamzaContainerMetrics(containerModel.getId(), new MetricsRegistryMap(), ""), JobContextImpl.fromConfigWithDefaults(jobConfig, jobModel), containerContext, stateBackendFactories, new HashMap<>(), storeBaseDir, storeBaseDir, null, new SystemClock());
        this.containerStorageManagers.put(containerModel.getId(), containerStorageManager);
    }
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) CoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.CoordinatorStreamStore) JobContextImpl(org.apache.samza.context.JobContextImpl) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) Serde(org.apache.samza.serializers.Serde) ContainerContextImpl(org.apache.samza.context.ContainerContextImpl) JobModelManager(org.apache.samza.coordinator.JobModelManager) SerdeFactory(org.apache.samza.serializers.SerdeFactory) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SamzaContainerMetrics(org.apache.samza.container.SamzaContainerMetrics) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) SystemConfig(org.apache.samza.config.SystemConfig) StreamUtil(org.apache.samza.util.StreamUtil) JobModel(org.apache.samza.job.model.JobModel) StorageConfig(org.apache.samza.config.StorageConfig) Logger(org.slf4j.Logger) SerializerConfig(org.apache.samza.config.SerializerConfig) TaskConfig(org.apache.samza.config.TaskConfig) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) SystemFactory(org.apache.samza.system.SystemFactory) Clock(org.apache.samza.util.Clock) Collectors(java.util.stream.Collectors) File(java.io.File) SamzaException(org.apache.samza.SamzaException) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) ReflectionUtil(org.apache.samza.util.ReflectionUtil) ContainerModel(org.apache.samza.job.model.ContainerModel) Optional(java.util.Optional) Config(org.apache.samza.config.Config) CoordinatorStreamUtil(org.apache.samza.util.CoordinatorStreamUtil) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) SystemAdmins(org.apache.samza.system.SystemAdmins) StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SystemFactory(org.apache.samza.system.SystemFactory) SystemConfig(org.apache.samza.config.SystemConfig) SystemClock(org.apache.samza.util.SystemClock) StorageConfig(org.apache.samza.config.StorageConfig) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) TaskConfig(org.apache.samza.config.TaskConfig) ContainerContextImpl(org.apache.samza.context.ContainerContextImpl) Clock(org.apache.samza.util.Clock) SystemClock(org.apache.samza.util.SystemClock) ContainerModel(org.apache.samza.job.model.ContainerModel) ContainerContext(org.apache.samza.context.ContainerContext) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) SamzaContainerMetrics(org.apache.samza.container.SamzaContainerMetrics)

Example 7 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class TestContainerStorageManager method setUp.

/**
 * Method to create a containerStorageManager with mocked dependencies
 */
@Before
public void setUp() throws InterruptedException {
    taskRestoreMetricGauges = new HashMap<>();
    this.tasks = new HashMap<>();
    this.taskInstanceMetrics = new HashMap<>();
    // Add two mocked tasks
    addMockedTask("task 0", 0);
    addMockedTask("task 1", 1);
    // Mock container metrics
    samzaContainerMetrics = mock(SamzaContainerMetrics.class);
    when(samzaContainerMetrics.taskStoreRestorationMetrics()).thenReturn(taskRestoreMetricGauges);
    // Create a map of test changeLogSSPs
    Map<String, SystemStream> changelogSystemStreams = new HashMap<>();
    changelogSystemStreams.put(STORE_NAME, new SystemStream(SYSTEM_NAME, STREAM_NAME));
    // Create mocked storage engine factories
    Map<String, StorageEngineFactory<Object, Object>> storageEngineFactories = new HashMap<>();
    StorageEngineFactory mockStorageEngineFactory = (StorageEngineFactory<Object, Object>) mock(StorageEngineFactory.class);
    StorageEngine mockStorageEngine = mock(StorageEngine.class);
    when(mockStorageEngine.getStoreProperties()).thenReturn(new StoreProperties.StorePropertiesBuilder().setLoggedStore(true).setPersistedToDisk(true).build());
    doAnswer(invocation -> {
        return mockStorageEngine;
    }).when(mockStorageEngineFactory).getStorageEngine(anyString(), any(), any(), any(), any(), any(), any(), any(), any(), any());
    storageEngineFactories.put(STORE_NAME, mockStorageEngineFactory);
    // Add instrumentation to mocked storage engine, to record the number of store.restore() calls
    doAnswer(invocation -> {
        storeRestoreCallCount++;
        return CompletableFuture.completedFuture(null);
    }).when(mockStorageEngine).restore(any());
    // Set the mocked stores' properties to be persistent
    doAnswer(invocation -> {
        return new StoreProperties.StorePropertiesBuilder().setLoggedStore(true).build();
    }).when(mockStorageEngine).getStoreProperties();
    // Mock and setup sysconsumers
    SystemConsumer mockSystemConsumer = mock(SystemConsumer.class);
    doAnswer(invocation -> {
        systemConsumerStartCount++;
        return null;
    }).when(mockSystemConsumer).start();
    doAnswer(invocation -> {
        systemConsumerStopCount++;
        return null;
    }).when(mockSystemConsumer).stop();
    // Create mocked system factories
    Map<String, SystemFactory> systemFactories = new HashMap<>();
    // Count the number of sysConsumers created
    SystemFactory mockSystemFactory = mock(SystemFactory.class);
    doAnswer(invocation -> {
        this.systemConsumerCreationCount++;
        return mockSystemConsumer;
    }).when(mockSystemFactory).getConsumer(anyString(), any(), any());
    systemFactories.put(SYSTEM_NAME, mockSystemFactory);
    // Create mocked configs for specifying serdes
    Map<String, String> configMap = new HashMap<>();
    configMap.put("stores." + STORE_NAME + ".key.serde", "stringserde");
    configMap.put("stores." + STORE_NAME + ".msg.serde", "stringserde");
    configMap.put("stores." + STORE_NAME + ".factory", mockStorageEngineFactory.getClass().getName());
    configMap.put("stores." + STORE_NAME + ".changelog", SYSTEM_NAME + "." + STREAM_NAME);
    configMap.put("serializers.registry.stringserde.class", StringSerdeFactory.class.getName());
    configMap.put(TaskConfig.TRANSACTIONAL_STATE_RETAIN_EXISTING_STATE, "true");
    Config config = new MapConfig(configMap);
    Map<String, Serde<Object>> serdes = new HashMap<>();
    serdes.put("stringserde", mock(Serde.class));
    // Create mocked system admins
    SystemAdmin mockSystemAdmin = mock(SystemAdmin.class);
    doAnswer(new Answer<Void>() {

        public Void answer(InvocationOnMock invocation) {
            Object[] args = invocation.getArguments();
            System.out.println("called with arguments: " + Arrays.toString(args));
            return null;
        }
    }).when(mockSystemAdmin).validateStream(any());
    SystemAdmins mockSystemAdmins = mock(SystemAdmins.class);
    when(mockSystemAdmins.getSystemAdmin("kafka")).thenReturn(mockSystemAdmin);
    // Create a mocked mockStreamMetadataCache
    SystemStreamMetadata.SystemStreamPartitionMetadata sspMetadata = new SystemStreamMetadata.SystemStreamPartitionMetadata("0", "50", "51");
    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();
    partitionMetadata.put(new Partition(0), sspMetadata);
    partitionMetadata.put(new Partition(1), sspMetadata);
    SystemStreamMetadata systemStreamMetadata = new SystemStreamMetadata(STREAM_NAME, partitionMetadata);
    StreamMetadataCache mockStreamMetadataCache = mock(StreamMetadataCache.class);
    when(mockStreamMetadataCache.getStreamMetadata(JavaConverters.asScalaSetConverter(new HashSet<SystemStream>(changelogSystemStreams.values())).asScala().toSet(), false)).thenReturn(new scala.collection.immutable.Map.Map1(new SystemStream(SYSTEM_NAME, STREAM_NAME), systemStreamMetadata));
    CheckpointManager checkpointManager = mock(CheckpointManager.class);
    when(checkpointManager.readLastCheckpoint(any(TaskName.class))).thenReturn(new CheckpointV1(new HashMap<>()));
    SSPMetadataCache mockSSPMetadataCache = mock(SSPMetadataCache.class);
    when(mockSSPMetadataCache.getMetadata(any(SystemStreamPartition.class))).thenReturn(new SystemStreamMetadata.SystemStreamPartitionMetadata("0", "10", "11"));
    ContainerContext mockContainerContext = mock(ContainerContext.class);
    ContainerModel mockContainerModel = new ContainerModel("samza-container-test", tasks);
    when(mockContainerContext.getContainerModel()).thenReturn(mockContainerModel);
    // Reset the expected number of sysConsumer create, start and stop calls, and store.restore() calls
    this.systemConsumerCreationCount = 0;
    this.systemConsumerStartCount = 0;
    this.systemConsumerStopCount = 0;
    this.storeRestoreCallCount = 0;
    StateBackendFactory backendFactory = mock(StateBackendFactory.class);
    TaskRestoreManager restoreManager = mock(TaskRestoreManager.class);
    ArgumentCaptor<ExecutorService> restoreExecutorCaptor = ArgumentCaptor.forClass(ExecutorService.class);
    when(backendFactory.getRestoreManager(any(), any(), any(), restoreExecutorCaptor.capture(), any(), any(), any(), any(), any(), any(), any())).thenReturn(restoreManager);
    doAnswer(invocation -> {
        storeRestoreCallCount++;
        return CompletableFuture.completedFuture(null);
    }).when(restoreManager).restore();
    // Create the container storage manager
    this.containerStorageManager = new ContainerStorageManager(checkpointManager, mockContainerModel, mockStreamMetadataCache, mockSystemAdmins, changelogSystemStreams, new HashMap<>(), storageEngineFactories, systemFactories, serdes, config, taskInstanceMetrics, samzaContainerMetrics, mock(JobContext.class), mockContainerContext, ImmutableMap.of(StorageConfig.KAFKA_STATE_BACKEND_FACTORY, backendFactory), mock(Map.class), DEFAULT_LOGGED_STORE_BASE_DIR, DEFAULT_STORE_BASE_DIR, null, new SystemClock());
}
Also used : Serde(org.apache.samza.serializers.Serde) StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SystemConsumer(org.apache.samza.system.SystemConsumer) SystemFactory(org.apache.samza.system.SystemFactory) StringSerdeFactory(org.apache.samza.serializers.StringSerdeFactory) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) StorageConfig(org.apache.samza.config.StorageConfig) Config(org.apache.samza.config.Config) TaskConfig(org.apache.samza.config.TaskConfig) ContainerModel(org.apache.samza.job.model.ContainerModel) ContainerContext(org.apache.samza.context.ContainerContext) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) MapConfig(org.apache.samza.config.MapConfig) SystemAdmins(org.apache.samza.system.SystemAdmins) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemClock(org.apache.samza.util.SystemClock) SystemStream(org.apache.samza.system.SystemStream) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) TaskName(org.apache.samza.container.TaskName) InvocationOnMock(org.mockito.invocation.InvocationOnMock) SSPMetadataCache(org.apache.samza.system.SSPMetadataCache) ExecutorService(java.util.concurrent.ExecutorService) SystemAdmin(org.apache.samza.system.SystemAdmin) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) SamzaContainerMetrics(org.apache.samza.container.SamzaContainerMetrics) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Before(org.junit.Before)

Example 8 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class TestInputRegexMonitor method setUp.

@Before
public void setUp() {
    inputStreamsDiscovered = new HashSet<>();
    Map<String, Pattern> patternMap = new HashMap<>();
    patternMap.put(systemName, Pattern.compile("test-.*"));
    StreamMetadataCache mockStreamMetadataCache = new MockStreamMetadataCache(null, 1, null);
    MetricsRegistry metrics = Mockito.mock(MetricsRegistry.class);
    this.callbackCount = new CountDownLatch(expectedNumberOfCallbacks);
    // Creating an streamRegexMonitor with empty-input set and test-.* regex input
    this.streamRegexMonitor = new StreamRegexMonitor(new HashSet<>(), patternMap, mockStreamMetadataCache, metrics, inputRegexMs, new StreamRegexMonitor.Callback() {

        @Override
        public void onInputStreamsChanged(Set<SystemStream> initialInputSet, Set<SystemStream> newInputStreams, Map<String, Pattern> regexesMonitored) {
            callbackCount.countDown();
            inputStreamsDiscovered.addAll(newInputStreams);
            // Check that the newInputStream discovered is "kafka" "Test-1"
            Assert.assertTrue(inputStreamsDiscovered.size() == 1);
            Assert.assertTrue(inputStreamsDiscovered.contains(sampleStream));
        }
    });
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) Pattern(java.util.regex.Pattern) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) CountDownLatch(java.util.concurrent.CountDownLatch) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet) Before(org.junit.Before)

Example 9 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class TestControlMessageSender method testSend.

@Test
public void testSend() {
    SystemStreamMetadata metadata = mock(SystemStreamMetadata.class);
    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();
    partitionMetadata.put(new Partition(0), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(1), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(2), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(3), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    when(metadata.getSystemStreamPartitionMetadata()).thenReturn(partitionMetadata);
    StreamMetadataCache metadataCache = mock(StreamMetadataCache.class);
    when(metadataCache.getSystemStreamMetadata(anyObject(), anyBoolean())).thenReturn(metadata);
    SystemStream systemStream = new SystemStream("test-system", "test-stream");
    Set<Integer> partitions = new HashSet<>();
    MessageCollector collector = mock(MessageCollector.class);
    doAnswer(invocation -> {
        OutgoingMessageEnvelope envelope = (OutgoingMessageEnvelope) invocation.getArguments()[0];
        partitions.add((Integer) envelope.getPartitionKey());
        assertEquals(envelope.getSystemStream(), systemStream);
        return null;
    }).when(collector).send(any());
    ControlMessageSender sender = new ControlMessageSender(metadataCache);
    WatermarkMessage watermark = new WatermarkMessage(System.currentTimeMillis(), "task 0");
    sender.send(watermark, systemStream, collector);
    assertEquals(partitions.size(), 1);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MessageCollector(org.apache.samza.task.MessageCollector) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 10 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class TestControlMessageSender method testBroadcast.

@Test
public void testBroadcast() {
    SystemStreamMetadata metadata = mock(SystemStreamMetadata.class);
    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();
    partitionMetadata.put(new Partition(0), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(1), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(2), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(3), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    when(metadata.getSystemStreamPartitionMetadata()).thenReturn(partitionMetadata);
    StreamMetadataCache metadataCache = mock(StreamMetadataCache.class);
    when(metadataCache.getSystemStreamMetadata(anyObject(), anyBoolean())).thenReturn(metadata);
    SystemStream systemStream = new SystemStream("test-system", "test-stream");
    Set<Integer> partitions = new HashSet<>();
    MessageCollector collector = mock(MessageCollector.class);
    doAnswer(invocation -> {
        OutgoingMessageEnvelope envelope = (OutgoingMessageEnvelope) invocation.getArguments()[0];
        partitions.add((Integer) envelope.getPartitionKey());
        assertEquals(envelope.getSystemStream(), systemStream);
        return null;
    }).when(collector).send(any());
    ControlMessageSender sender = new ControlMessageSender(metadataCache);
    WatermarkMessage watermark = new WatermarkMessage(System.currentTimeMillis(), "task 0");
    SystemStreamPartition ssp = new SystemStreamPartition(systemStream, new Partition(0));
    sender.broadcastToOtherPartitions(watermark, ssp, collector);
    assertEquals(partitions.size(), 3);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MessageCollector(org.apache.samza.task.MessageCollector) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Aggregations

StreamMetadataCache (org.apache.samza.system.StreamMetadataCache)13 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)5 SystemAdmins (org.apache.samza.system.SystemAdmins)5 SystemStream (org.apache.samza.system.SystemStream)5 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)5 Map (java.util.Map)4 Partition (org.apache.samza.Partition)4 Config (org.apache.samza.config.Config)4 StorageConfig (org.apache.samza.config.StorageConfig)4 ContainerModel (org.apache.samza.job.model.ContainerModel)4 SystemStreamMetadata (org.apache.samza.system.SystemStreamMetadata)4 Set (java.util.Set)3 JobConfig (org.apache.samza.config.JobConfig)3 MapConfig (org.apache.samza.config.MapConfig)3 TaskConfig (org.apache.samza.config.TaskConfig)3 TaskName (org.apache.samza.container.TaskName)3 TaskModel (org.apache.samza.job.model.TaskModel)3 SystemClock (org.apache.samza.util.SystemClock)3 File (java.io.File)2