Search in sources :

Example 6 with SystemConsumer

use of org.apache.samza.system.SystemConsumer in project samza by apache.

the class TestRunner method consumeStream.

/**
 * Gets the contents of the output stream represented by {@code outputDescriptor} after {@link TestRunner#run(Duration)}
 * has completed
 *
 * @param outputDescriptor describes the stream to be consumed
 * @param timeout timeout for consumption of stream in Ms
 * @param <StreamMessageType> type of message
 *
 * @return a map whose key is {@code partitionId} and value is messages in partition
 * @throws SamzaException Thrown when a poll is incomplete
 */
public static <StreamMessageType> Map<Integer, List<StreamMessageType>> consumeStream(InMemoryOutputDescriptor outputDescriptor, Duration timeout) throws SamzaException {
    Preconditions.checkNotNull(outputDescriptor);
    String streamId = outputDescriptor.getStreamId();
    String systemName = outputDescriptor.getSystemName();
    Set<SystemStreamPartition> ssps = new HashSet<>();
    Set<String> streamIds = new HashSet<>();
    streamIds.add(streamId);
    SystemFactory factory = new InMemorySystemFactory();
    Config config = new MapConfig(outputDescriptor.toConfig(), outputDescriptor.getSystemDescriptor().toConfig());
    Map<String, SystemStreamMetadata> metadata = factory.getAdmin(systemName, config).getSystemStreamMetadata(streamIds);
    SystemConsumer consumer = factory.getConsumer(systemName, config, null);
    String name = (String) outputDescriptor.getPhysicalName().orElse(streamId);
    metadata.get(name).getSystemStreamPartitionMetadata().keySet().forEach(partition -> {
        SystemStreamPartition temp = new SystemStreamPartition(systemName, streamId, partition);
        ssps.add(temp);
        consumer.register(temp, "0");
    });
    long t = System.currentTimeMillis();
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> output = new HashMap<>();
    HashSet<SystemStreamPartition> didNotReachEndOfStream = new HashSet<>(ssps);
    while (System.currentTimeMillis() < t + timeout.toMillis()) {
        Map<SystemStreamPartition, List<IncomingMessageEnvelope>> currentState = null;
        try {
            currentState = consumer.poll(ssps, 10);
        } catch (InterruptedException e) {
            throw new SamzaException("Timed out while consuming stream \n" + e.getMessage());
        }
        for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> entry : currentState.entrySet()) {
            SystemStreamPartition ssp = entry.getKey();
            output.computeIfAbsent(ssp, k -> new LinkedList<IncomingMessageEnvelope>());
            List<IncomingMessageEnvelope> currentBuffer = entry.getValue();
            int totalMessagesToFetch = Integer.valueOf(metadata.get(outputDescriptor.getStreamId()).getSystemStreamPartitionMetadata().get(ssp.getPartition()).getUpcomingOffset());
            if (output.get(ssp).size() + currentBuffer.size() == totalMessagesToFetch) {
                didNotReachEndOfStream.remove(entry.getKey());
                ssps.remove(entry.getKey());
            }
            output.get(ssp).addAll(currentBuffer);
        }
        if (didNotReachEndOfStream.isEmpty()) {
            break;
        }
    }
    if (!didNotReachEndOfStream.isEmpty()) {
        throw new IllegalStateException("Could not poll for all system stream partitions");
    }
    return output.entrySet().stream().collect(Collectors.toMap(entry -> entry.getKey().getPartition().getPartitionId(), entry -> entry.getValue().stream().map(e -> (StreamMessageType) e.getMessage()).collect(Collectors.toList())));
}
Also used : InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) LoggerFactory(org.slf4j.LoggerFactory) InMemorySystemProducer(org.apache.samza.system.inmemory.InMemorySystemProducer) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) FileUtil(org.apache.samza.util.FileUtil) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) SystemConsumer(org.apache.samza.system.SystemConsumer) Duration(java.time.Duration) Map(java.util.Map) StreamTask(org.apache.samza.task.StreamTask) SamzaApplication(org.apache.samza.application.SamzaApplication) InMemoryMetadataStoreFactory(org.apache.samza.metadatastore.InMemoryMetadataStoreFactory) ExternalContext(org.apache.samza.context.ExternalContext) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) AsyncStreamTask(org.apache.samza.task.AsyncStreamTask) StreamDescriptor(org.apache.samza.system.descriptors.StreamDescriptor) Set(java.util.Set) InMemorySystemConfig(org.apache.samza.config.InMemorySystemConfig) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) Collectors(java.util.stream.Collectors) List(java.util.List) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) ApplicationStatus(org.apache.samza.job.ApplicationStatus) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) StreamConfig(org.apache.samza.config.StreamConfig) HashSet(java.util.HashSet) SystemStream(org.apache.samza.system.SystemStream) ApplicationConfig(org.apache.samza.config.ApplicationConfig) LinkedList(java.util.LinkedList) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) Logger(org.slf4j.Logger) TaskConfig(org.apache.samza.config.TaskConfig) JobPlanner(org.apache.samza.execution.JobPlanner) SystemFactory(org.apache.samza.system.SystemFactory) StreamSpec(org.apache.samza.system.StreamSpec) File(java.io.File) SamzaException(org.apache.samza.SamzaException) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Preconditions(com.google.common.base.Preconditions) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) SystemConsumer(org.apache.samza.system.SystemConsumer) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) SystemFactory(org.apache.samza.system.SystemFactory) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) InMemorySystemConfig(org.apache.samza.config.InMemorySystemConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) StreamConfig(org.apache.samza.config.StreamConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) TaskConfig(org.apache.samza.config.TaskConfig) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) SamzaException(org.apache.samza.SamzaException) List(java.util.List) LinkedList(java.util.LinkedList) MapConfig(org.apache.samza.config.MapConfig) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) HashSet(java.util.HashSet) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) Map(java.util.Map) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 7 with SystemConsumer

use of org.apache.samza.system.SystemConsumer in project samza by apache.

the class TestAsyncRunLoop method testEndOfStreamOffsetManagement.

// TODO: Add assertions.
//@Test
public void testEndOfStreamOffsetManagement() throws Exception {
    //explicitly configure to disable commits inside process or window calls and invoke commit from end of stream
    TestTask mockStreamTask1 = new TestTask(true, false, false, null);
    TestTask mockStreamTask2 = new TestTask(true, false, false, null);
    Partition p1 = new Partition(1);
    Partition p2 = new Partition(2);
    SystemStreamPartition ssp1 = new SystemStreamPartition("system1", "stream1", p1);
    SystemStreamPartition ssp2 = new SystemStreamPartition("system1", "stream2", p2);
    IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp2, "1", "key1", "message1");
    IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp2, "2", "key1", "message1");
    IncomingMessageEnvelope envelope3 = IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp2);
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> sspMap = new HashMap<>();
    List<IncomingMessageEnvelope> messageList = new ArrayList<>();
    messageList.add(envelope1);
    messageList.add(envelope2);
    messageList.add(envelope3);
    sspMap.put(ssp2, messageList);
    SystemConsumer mockConsumer = mock(SystemConsumer.class);
    when(mockConsumer.poll(anyObject(), anyLong())).thenReturn(sspMap);
    HashMap<String, SystemConsumer> systemConsumerMap = new HashMap<>();
    systemConsumerMap.put("system1", mockConsumer);
    SystemConsumers consumers = TestSystemConsumers.getSystemConsumers(systemConsumerMap);
    TaskName taskName1 = new TaskName("task1");
    TaskName taskName2 = new TaskName("task2");
    Set<TaskName> taskNames = new HashSet<>();
    taskNames.add(taskName1);
    taskNames.add(taskName2);
    OffsetManager offsetManager = mock(OffsetManager.class);
    when(offsetManager.getLastProcessedOffset(taskName1, ssp1)).thenReturn(Option.apply("3"));
    when(offsetManager.getLastProcessedOffset(taskName2, ssp2)).thenReturn(Option.apply("0"));
    when(offsetManager.getStartingOffset(taskName1, ssp1)).thenReturn(Option.apply(IncomingMessageEnvelope.END_OF_STREAM_OFFSET));
    when(offsetManager.getStartingOffset(taskName2, ssp2)).thenReturn(Option.apply("1"));
    TaskInstance taskInstance1 = createTaskInstance(mockStreamTask1, taskName1, ssp1, offsetManager, consumers);
    TaskInstance taskInstance2 = createTaskInstance(mockStreamTask2, taskName2, ssp2, offsetManager, consumers);
    Map<TaskName, TaskInstance> tasks = new HashMap<>();
    tasks.put(taskName1, taskInstance1);
    tasks.put(taskName2, taskInstance2);
    taskInstance1.registerConsumers();
    taskInstance2.registerConsumers();
    consumers.start();
    int maxMessagesInFlight = 1;
    AsyncRunLoop runLoop = new AsyncRunLoop(tasks, executor, consumers, maxMessagesInFlight, windowMs, commitMs, callbackTimeoutMs, maxThrottlingDelayMs, containerMetrics, () -> 0L, false);
    runLoop.run();
}
Also used : SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemConsumer(org.apache.samza.system.SystemConsumer) TaskInstance(org.apache.samza.container.TaskInstance) SystemConsumers(org.apache.samza.system.SystemConsumers) TestSystemConsumers(org.apache.samza.system.TestSystemConsumers) HashMap(java.util.HashMap) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) OffsetManager(org.apache.samza.checkpoint.OffsetManager) ArrayList(java.util.ArrayList) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet)

Example 8 with SystemConsumer

use of org.apache.samza.system.SystemConsumer in project samza by apache.

the class StorageRecovery method getTaskStorageManagers.

/**
   * create one TaskStorageManager for each task. Add all of them to the
   * List<TaskStorageManager>
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void getTaskStorageManagers() {
    StreamMetadataCache streamMetadataCache = new StreamMetadataCache(Util.javaMapAsScalaMap(systemAdmins), 5000, SystemClock.instance());
    for (ContainerModel containerModel : containers.values()) {
        HashMap<String, StorageEngine> taskStores = new HashMap<String, StorageEngine>();
        SamzaContainerContext containerContext = new SamzaContainerContext(containerModel.getProcessorId(), jobConfig, containerModel.getTasks().keySet());
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            HashMap<String, SystemConsumer> storeConsumers = getStoreConsumers();
            for (Entry<String, StorageEngineFactory<?, ?>> entry : storageEngineFactories.entrySet()) {
                String storeName = entry.getKey();
                if (changeLogSystemStreams.containsKey(storeName)) {
                    SystemStreamPartition changeLogSystemStreamPartition = new SystemStreamPartition(changeLogSystemStreams.get(storeName), taskModel.getChangelogPartition());
                    File storePartitionDir = TaskStorageManager.getStorePartitionDir(storeBaseDir, storeName, taskModel.getTaskName());
                    log.info("Got storage engine directory: " + storePartitionDir);
                    StorageEngine storageEngine = (entry.getValue()).getStorageEngine(storeName, storePartitionDir, (Serde) new ByteSerde(), (Serde) new ByteSerde(), null, new MetricsRegistryMap(), changeLogSystemStreamPartition, containerContext);
                    taskStores.put(storeName, storageEngine);
                }
            }
            TaskStorageManager taskStorageManager = new TaskStorageManager(taskModel.getTaskName(), Util.javaMapAsScalaMap(taskStores), Util.javaMapAsScalaMap(storeConsumers), Util.javaMapAsScalaMap(changeLogSystemStreams), maxPartitionNumber, streamMetadataCache, storeBaseDir, storeBaseDir, taskModel.getChangelogPartition(), Util.javaMapAsScalaMap(systemAdmins), new StorageConfig(jobConfig).getChangeLogDeleteRetentionsInMs(), new SystemClock());
            taskStorageManagers.add(taskStorageManager);
        }
    }
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SamzaContainerContext(org.apache.samza.container.SamzaContainerContext) SystemConsumer(org.apache.samza.system.SystemConsumer) SystemClock(org.apache.samza.util.SystemClock) HashMap(java.util.HashMap) StorageConfig(org.apache.samza.config.StorageConfig) JavaStorageConfig(org.apache.samza.config.JavaStorageConfig) ContainerModel(org.apache.samza.job.model.ContainerModel) ByteSerde(org.apache.samza.serializers.ByteSerde) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 9 with SystemConsumer

use of org.apache.samza.system.SystemConsumer in project samza by apache.

the class TransactionalStateTaskRestoreManager method registerStartingOffsets.

/**
 * Determines the starting offset for each store changelog SSP that needs to be restored from,
 * and registers it with the respective SystemConsumer.
 */
@VisibleForTesting
static void registerStartingOffsets(TaskModel taskModel, StoreActions storeActions, Map<String, SystemStream> storeChangelogs, SystemAdmins systemAdmins, Map<String, SystemConsumer> storeConsumers, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets) {
    Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;
    // must register at least one SSP with each changelog system consumer otherwise start will throw.
    // hence we register upcoming offset as the dummy offset by default and override it later if necessary.
    // using upcoming offset ensures that no messages are replayed by default.
    storeChangelogs.forEach((storeName, changelog) -> {
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemConsumer systemConsumer = storeConsumers.get(storeName);
        SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
        String upcomingOffset = currentOffsets.getUpcomingOffset();
        LOG.info("Temporarily registering upcoming offset: {} as the starting offest for changelog ssp: {}. " + "This might be overridden later for stores that need restoring.", upcomingOffset, changelogSSP);
        systemConsumer.register(changelogSSP, upcomingOffset);
    });
    // now register the actual starting offset if necessary. system consumer will ensure that the lower of the
    // two registered offsets is used as the starting offset.
    storesToRestore.forEach((storeName, restoreOffsets) -> {
        SystemStream changelog = storeChangelogs.get(storeName);
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(changelog.getSystem());
        validateRestoreOffsets(restoreOffsets, systemAdmin);
        SystemConsumer systemConsumer = storeConsumers.get(storeName);
        SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
        String oldestOffset = currentOffsets.getOldestOffset();
        // if the starting offset equals oldest offset (e.g. for full restore), start from the oldest offset (inclusive).
        // else, start from the next (upcoming) offset.
        String startingOffset;
        if (systemAdmin.offsetComparator(restoreOffsets.startingOffset, oldestOffset) == 0) {
            startingOffset = oldestOffset;
        } else {
            Map<SystemStreamPartition, String> offsetMap = ImmutableMap.of(changelogSSP, restoreOffsets.startingOffset);
            startingOffset = systemAdmin.getOffsetsAfter(offsetMap).get(changelogSSP);
        }
        LOG.info("Registering starting offset: {} for changelog ssp: {}", startingOffset, changelogSSP);
        systemConsumer.register(changelogSSP, startingOffset);
    });
}
Also used : SystemConsumer(org.apache.samza.system.SystemConsumer) SystemStream(org.apache.samza.system.SystemStream) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 10 with SystemConsumer

use of org.apache.samza.system.SystemConsumer in project samza by apache.

the class TransactionalStateTaskRestoreManager method restore.

@Override
public CompletableFuture<Void> restore() {
    return CompletableFuture.runAsync(() -> {
        Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;
        for (Map.Entry<String, RestoreOffsets> entry : storesToRestore.entrySet()) {
            String storeName = entry.getKey();
            String endOffset = entry.getValue().endingOffset;
            SystemStream systemStream = storeChangelogs.get(storeName);
            SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(systemStream.getSystem());
            SystemConsumer systemConsumer = storeConsumers.get(storeName);
            SystemStreamPartition changelogSSP = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());
            ChangelogSSPIterator changelogSSPIterator = new ChangelogSSPIterator(systemConsumer, changelogSSP, endOffset, systemAdmin, true, currentChangelogOffsets.get(changelogSSP).getNewestOffset());
            StorageEngine taskStore = storeEngines.get(storeName);
            LOG.info("Restoring store: {} for task: {}", storeName, taskModel.getTaskName());
            try {
                taskStore.restore(changelogSSPIterator);
            } catch (InterruptedException e) {
                String msg = String.format("Interrupted while restoring store: %s for task: %s", storeName, taskModel.getTaskName().getTaskName());
                // wrap in unchecked exception to throw from lambda
                throw new SamzaException(msg, e);
            }
        }
    }, restoreExecutor);
}
Also used : SystemConsumer(org.apache.samza.system.SystemConsumer) SystemStream(org.apache.samza.system.SystemStream) ChangelogSSPIterator(org.apache.samza.system.ChangelogSSPIterator) SamzaException(org.apache.samza.SamzaException) SystemAdmin(org.apache.samza.system.SystemAdmin) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

SystemConsumer (org.apache.samza.system.SystemConsumer)18 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)17 SystemStream (org.apache.samza.system.SystemStream)12 HashMap (java.util.HashMap)11 SystemAdmin (org.apache.samza.system.SystemAdmin)10 Map (java.util.Map)9 List (java.util.List)8 Partition (org.apache.samza.Partition)8 Config (org.apache.samza.config.Config)8 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)8 MapConfig (org.apache.samza.config.MapConfig)7 Test (org.junit.Test)7 Set (java.util.Set)6 File (java.io.File)5 SamzaException (org.apache.samza.SamzaException)5 TaskConfig (org.apache.samza.config.TaskConfig)5 TaskName (org.apache.samza.container.TaskName)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 ArrayList (java.util.ArrayList)4 TaskModel (org.apache.samza.job.model.TaskModel)4