use of org.apache.samza.system.SystemConsumer in project samza by apache.
the class TestRunner method consumeStream.
/**
* Gets the contents of the output stream represented by {@code outputDescriptor} after {@link TestRunner#run(Duration)}
* has completed
*
* @param outputDescriptor describes the stream to be consumed
* @param timeout timeout for consumption of stream in Ms
* @param <StreamMessageType> type of message
*
* @return a map whose key is {@code partitionId} and value is messages in partition
* @throws SamzaException Thrown when a poll is incomplete
*/
public static <StreamMessageType> Map<Integer, List<StreamMessageType>> consumeStream(InMemoryOutputDescriptor outputDescriptor, Duration timeout) throws SamzaException {
Preconditions.checkNotNull(outputDescriptor);
String streamId = outputDescriptor.getStreamId();
String systemName = outputDescriptor.getSystemName();
Set<SystemStreamPartition> ssps = new HashSet<>();
Set<String> streamIds = new HashSet<>();
streamIds.add(streamId);
SystemFactory factory = new InMemorySystemFactory();
Config config = new MapConfig(outputDescriptor.toConfig(), outputDescriptor.getSystemDescriptor().toConfig());
Map<String, SystemStreamMetadata> metadata = factory.getAdmin(systemName, config).getSystemStreamMetadata(streamIds);
SystemConsumer consumer = factory.getConsumer(systemName, config, null);
String name = (String) outputDescriptor.getPhysicalName().orElse(streamId);
metadata.get(name).getSystemStreamPartitionMetadata().keySet().forEach(partition -> {
SystemStreamPartition temp = new SystemStreamPartition(systemName, streamId, partition);
ssps.add(temp);
consumer.register(temp, "0");
});
long t = System.currentTimeMillis();
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> output = new HashMap<>();
HashSet<SystemStreamPartition> didNotReachEndOfStream = new HashSet<>(ssps);
while (System.currentTimeMillis() < t + timeout.toMillis()) {
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> currentState = null;
try {
currentState = consumer.poll(ssps, 10);
} catch (InterruptedException e) {
throw new SamzaException("Timed out while consuming stream \n" + e.getMessage());
}
for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> entry : currentState.entrySet()) {
SystemStreamPartition ssp = entry.getKey();
output.computeIfAbsent(ssp, k -> new LinkedList<IncomingMessageEnvelope>());
List<IncomingMessageEnvelope> currentBuffer = entry.getValue();
int totalMessagesToFetch = Integer.valueOf(metadata.get(outputDescriptor.getStreamId()).getSystemStreamPartitionMetadata().get(ssp.getPartition()).getUpcomingOffset());
if (output.get(ssp).size() + currentBuffer.size() == totalMessagesToFetch) {
didNotReachEndOfStream.remove(entry.getKey());
ssps.remove(entry.getKey());
}
output.get(ssp).addAll(currentBuffer);
}
if (didNotReachEndOfStream.isEmpty()) {
break;
}
}
if (!didNotReachEndOfStream.isEmpty()) {
throw new IllegalStateException("Could not poll for all system stream partitions");
}
return output.entrySet().stream().collect(Collectors.toMap(entry -> entry.getKey().getPartition().getPartitionId(), entry -> entry.getValue().stream().map(e -> (StreamMessageType) e.getMessage()).collect(Collectors.toList())));
}
use of org.apache.samza.system.SystemConsumer in project samza by apache.
the class TestAsyncRunLoop method testEndOfStreamOffsetManagement.
// TODO: Add assertions.
//@Test
public void testEndOfStreamOffsetManagement() throws Exception {
//explicitly configure to disable commits inside process or window calls and invoke commit from end of stream
TestTask mockStreamTask1 = new TestTask(true, false, false, null);
TestTask mockStreamTask2 = new TestTask(true, false, false, null);
Partition p1 = new Partition(1);
Partition p2 = new Partition(2);
SystemStreamPartition ssp1 = new SystemStreamPartition("system1", "stream1", p1);
SystemStreamPartition ssp2 = new SystemStreamPartition("system1", "stream2", p2);
IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp2, "1", "key1", "message1");
IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp2, "2", "key1", "message1");
IncomingMessageEnvelope envelope3 = IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp2);
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> sspMap = new HashMap<>();
List<IncomingMessageEnvelope> messageList = new ArrayList<>();
messageList.add(envelope1);
messageList.add(envelope2);
messageList.add(envelope3);
sspMap.put(ssp2, messageList);
SystemConsumer mockConsumer = mock(SystemConsumer.class);
when(mockConsumer.poll(anyObject(), anyLong())).thenReturn(sspMap);
HashMap<String, SystemConsumer> systemConsumerMap = new HashMap<>();
systemConsumerMap.put("system1", mockConsumer);
SystemConsumers consumers = TestSystemConsumers.getSystemConsumers(systemConsumerMap);
TaskName taskName1 = new TaskName("task1");
TaskName taskName2 = new TaskName("task2");
Set<TaskName> taskNames = new HashSet<>();
taskNames.add(taskName1);
taskNames.add(taskName2);
OffsetManager offsetManager = mock(OffsetManager.class);
when(offsetManager.getLastProcessedOffset(taskName1, ssp1)).thenReturn(Option.apply("3"));
when(offsetManager.getLastProcessedOffset(taskName2, ssp2)).thenReturn(Option.apply("0"));
when(offsetManager.getStartingOffset(taskName1, ssp1)).thenReturn(Option.apply(IncomingMessageEnvelope.END_OF_STREAM_OFFSET));
when(offsetManager.getStartingOffset(taskName2, ssp2)).thenReturn(Option.apply("1"));
TaskInstance taskInstance1 = createTaskInstance(mockStreamTask1, taskName1, ssp1, offsetManager, consumers);
TaskInstance taskInstance2 = createTaskInstance(mockStreamTask2, taskName2, ssp2, offsetManager, consumers);
Map<TaskName, TaskInstance> tasks = new HashMap<>();
tasks.put(taskName1, taskInstance1);
tasks.put(taskName2, taskInstance2);
taskInstance1.registerConsumers();
taskInstance2.registerConsumers();
consumers.start();
int maxMessagesInFlight = 1;
AsyncRunLoop runLoop = new AsyncRunLoop(tasks, executor, consumers, maxMessagesInFlight, windowMs, commitMs, callbackTimeoutMs, maxThrottlingDelayMs, containerMetrics, () -> 0L, false);
runLoop.run();
}
use of org.apache.samza.system.SystemConsumer in project samza by apache.
the class StorageRecovery method getTaskStorageManagers.
/**
* create one TaskStorageManager for each task. Add all of them to the
* List<TaskStorageManager>
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private void getTaskStorageManagers() {
StreamMetadataCache streamMetadataCache = new StreamMetadataCache(Util.javaMapAsScalaMap(systemAdmins), 5000, SystemClock.instance());
for (ContainerModel containerModel : containers.values()) {
HashMap<String, StorageEngine> taskStores = new HashMap<String, StorageEngine>();
SamzaContainerContext containerContext = new SamzaContainerContext(containerModel.getProcessorId(), jobConfig, containerModel.getTasks().keySet());
for (TaskModel taskModel : containerModel.getTasks().values()) {
HashMap<String, SystemConsumer> storeConsumers = getStoreConsumers();
for (Entry<String, StorageEngineFactory<?, ?>> entry : storageEngineFactories.entrySet()) {
String storeName = entry.getKey();
if (changeLogSystemStreams.containsKey(storeName)) {
SystemStreamPartition changeLogSystemStreamPartition = new SystemStreamPartition(changeLogSystemStreams.get(storeName), taskModel.getChangelogPartition());
File storePartitionDir = TaskStorageManager.getStorePartitionDir(storeBaseDir, storeName, taskModel.getTaskName());
log.info("Got storage engine directory: " + storePartitionDir);
StorageEngine storageEngine = (entry.getValue()).getStorageEngine(storeName, storePartitionDir, (Serde) new ByteSerde(), (Serde) new ByteSerde(), null, new MetricsRegistryMap(), changeLogSystemStreamPartition, containerContext);
taskStores.put(storeName, storageEngine);
}
}
TaskStorageManager taskStorageManager = new TaskStorageManager(taskModel.getTaskName(), Util.javaMapAsScalaMap(taskStores), Util.javaMapAsScalaMap(storeConsumers), Util.javaMapAsScalaMap(changeLogSystemStreams), maxPartitionNumber, streamMetadataCache, storeBaseDir, storeBaseDir, taskModel.getChangelogPartition(), Util.javaMapAsScalaMap(systemAdmins), new StorageConfig(jobConfig).getChangeLogDeleteRetentionsInMs(), new SystemClock());
taskStorageManagers.add(taskStorageManager);
}
}
}
use of org.apache.samza.system.SystemConsumer in project samza by apache.
the class TransactionalStateTaskRestoreManager method registerStartingOffsets.
/**
* Determines the starting offset for each store changelog SSP that needs to be restored from,
* and registers it with the respective SystemConsumer.
*/
@VisibleForTesting
static void registerStartingOffsets(TaskModel taskModel, StoreActions storeActions, Map<String, SystemStream> storeChangelogs, SystemAdmins systemAdmins, Map<String, SystemConsumer> storeConsumers, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets) {
Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;
// must register at least one SSP with each changelog system consumer otherwise start will throw.
// hence we register upcoming offset as the dummy offset by default and override it later if necessary.
// using upcoming offset ensures that no messages are replayed by default.
storeChangelogs.forEach((storeName, changelog) -> {
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
SystemConsumer systemConsumer = storeConsumers.get(storeName);
SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
String upcomingOffset = currentOffsets.getUpcomingOffset();
LOG.info("Temporarily registering upcoming offset: {} as the starting offest for changelog ssp: {}. " + "This might be overridden later for stores that need restoring.", upcomingOffset, changelogSSP);
systemConsumer.register(changelogSSP, upcomingOffset);
});
// now register the actual starting offset if necessary. system consumer will ensure that the lower of the
// two registered offsets is used as the starting offset.
storesToRestore.forEach((storeName, restoreOffsets) -> {
SystemStream changelog = storeChangelogs.get(storeName);
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(changelog.getSystem());
validateRestoreOffsets(restoreOffsets, systemAdmin);
SystemConsumer systemConsumer = storeConsumers.get(storeName);
SystemStreamPartitionMetadata currentOffsets = currentChangelogOffsets.get(changelogSSP);
String oldestOffset = currentOffsets.getOldestOffset();
// if the starting offset equals oldest offset (e.g. for full restore), start from the oldest offset (inclusive).
// else, start from the next (upcoming) offset.
String startingOffset;
if (systemAdmin.offsetComparator(restoreOffsets.startingOffset, oldestOffset) == 0) {
startingOffset = oldestOffset;
} else {
Map<SystemStreamPartition, String> offsetMap = ImmutableMap.of(changelogSSP, restoreOffsets.startingOffset);
startingOffset = systemAdmin.getOffsetsAfter(offsetMap).get(changelogSSP);
}
LOG.info("Registering starting offset: {} for changelog ssp: {}", startingOffset, changelogSSP);
systemConsumer.register(changelogSSP, startingOffset);
});
}
use of org.apache.samza.system.SystemConsumer in project samza by apache.
the class TransactionalStateTaskRestoreManager method restore.
@Override
public CompletableFuture<Void> restore() {
return CompletableFuture.runAsync(() -> {
Map<String, RestoreOffsets> storesToRestore = storeActions.storesToRestore;
for (Map.Entry<String, RestoreOffsets> entry : storesToRestore.entrySet()) {
String storeName = entry.getKey();
String endOffset = entry.getValue().endingOffset;
SystemStream systemStream = storeChangelogs.get(storeName);
SystemAdmin systemAdmin = systemAdmins.getSystemAdmin(systemStream.getSystem());
SystemConsumer systemConsumer = storeConsumers.get(storeName);
SystemStreamPartition changelogSSP = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());
ChangelogSSPIterator changelogSSPIterator = new ChangelogSSPIterator(systemConsumer, changelogSSP, endOffset, systemAdmin, true, currentChangelogOffsets.get(changelogSSP).getNewestOffset());
StorageEngine taskStore = storeEngines.get(storeName);
LOG.info("Restoring store: {} for task: {}", storeName, taskModel.getTaskName());
try {
taskStore.restore(changelogSSPIterator);
} catch (InterruptedException e) {
String msg = String.format("Interrupted while restoring store: %s for task: %s", storeName, taskModel.getTaskName().getTaskName());
// wrap in unchecked exception to throw from lambda
throw new SamzaException(msg, e);
}
}
}, restoreExecutor);
}
Aggregations