Search in sources :

Example 31 with SystemStreamPartition

use of org.apache.samza.system.SystemStreamPartition in project samza by apache.

the class TestTaskCallbackManager method testUpdateCallbackOutofOrder.

@Test
public void testUpdateCallbackOutofOrder() {
    TaskName taskName = new TaskName("Partition 0");
    SystemStreamPartition ssp = new SystemStreamPartition("kafka", "topic", new Partition(0));
    ReadableCoordinator coordinator = new ReadableCoordinator(taskName);
    // simulate out of order
    IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp, "2", null, null);
    TaskCallbackImpl callback2 = new TaskCallbackImpl(listener, taskName, envelope2, coordinator, 2, 0);
    List<TaskCallbackImpl> callbacksToUpdate = callbackManager.updateCallback(callback2);
    assertTrue(callbacksToUpdate.isEmpty());
    IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp, "1", null, null);
    TaskCallbackImpl callback1 = new TaskCallbackImpl(listener, taskName, envelope1, coordinator, 1, 0);
    callbacksToUpdate = callbackManager.updateCallback(callback1);
    assertTrue(callbacksToUpdate.isEmpty());
    IncomingMessageEnvelope envelope0 = new IncomingMessageEnvelope(ssp, "0", null, null);
    TaskCallbackImpl callback0 = new TaskCallbackImpl(listener, taskName, envelope0, coordinator, 0, 0);
    callbacksToUpdate = callbackManager.updateCallback(callback0);
    assertEquals(3, callbacksToUpdate.size());
    TaskCallbackImpl callback = callbacksToUpdate.get(0);
    assertTrue(callback.matchSeqNum(0));
    assertEquals(ssp, callback.envelope.getSystemStreamPartition());
    assertEquals("0", callback.envelope.getOffset());
    callback = callbacksToUpdate.get(1);
    assertTrue(callback.matchSeqNum(1));
    assertEquals(ssp, callback.envelope.getSystemStreamPartition());
    assertEquals("1", callback.envelope.getOffset());
    callback = callbacksToUpdate.get(2);
    assertTrue(callback.matchSeqNum(2));
    assertEquals(ssp, callback.envelope.getSystemStreamPartition());
    assertEquals("2", callback.envelope.getOffset());
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 32 with SystemStreamPartition

use of org.apache.samza.system.SystemStreamPartition in project samza by apache.

the class TestAsyncRunLoop method testEndOfStreamOffsetManagement.

// TODO: Add assertions.
//@Test
public void testEndOfStreamOffsetManagement() throws Exception {
    //explicitly configure to disable commits inside process or window calls and invoke commit from end of stream
    TestTask mockStreamTask1 = new TestTask(true, false, false, null);
    TestTask mockStreamTask2 = new TestTask(true, false, false, null);
    Partition p1 = new Partition(1);
    Partition p2 = new Partition(2);
    SystemStreamPartition ssp1 = new SystemStreamPartition("system1", "stream1", p1);
    SystemStreamPartition ssp2 = new SystemStreamPartition("system1", "stream2", p2);
    IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp2, "1", "key1", "message1");
    IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp2, "2", "key1", "message1");
    IncomingMessageEnvelope envelope3 = IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp2);
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> sspMap = new HashMap<>();
    List<IncomingMessageEnvelope> messageList = new ArrayList<>();
    messageList.add(envelope1);
    messageList.add(envelope2);
    messageList.add(envelope3);
    sspMap.put(ssp2, messageList);
    SystemConsumer mockConsumer = mock(SystemConsumer.class);
    when(mockConsumer.poll(anyObject(), anyLong())).thenReturn(sspMap);
    HashMap<String, SystemConsumer> systemConsumerMap = new HashMap<>();
    systemConsumerMap.put("system1", mockConsumer);
    SystemConsumers consumers = TestSystemConsumers.getSystemConsumers(systemConsumerMap);
    TaskName taskName1 = new TaskName("task1");
    TaskName taskName2 = new TaskName("task2");
    Set<TaskName> taskNames = new HashSet<>();
    taskNames.add(taskName1);
    taskNames.add(taskName2);
    OffsetManager offsetManager = mock(OffsetManager.class);
    when(offsetManager.getLastProcessedOffset(taskName1, ssp1)).thenReturn(Option.apply("3"));
    when(offsetManager.getLastProcessedOffset(taskName2, ssp2)).thenReturn(Option.apply("0"));
    when(offsetManager.getStartingOffset(taskName1, ssp1)).thenReturn(Option.apply(IncomingMessageEnvelope.END_OF_STREAM_OFFSET));
    when(offsetManager.getStartingOffset(taskName2, ssp2)).thenReturn(Option.apply("1"));
    TaskInstance taskInstance1 = createTaskInstance(mockStreamTask1, taskName1, ssp1, offsetManager, consumers);
    TaskInstance taskInstance2 = createTaskInstance(mockStreamTask2, taskName2, ssp2, offsetManager, consumers);
    Map<TaskName, TaskInstance> tasks = new HashMap<>();
    tasks.put(taskName1, taskInstance1);
    tasks.put(taskName2, taskInstance2);
    taskInstance1.registerConsumers();
    taskInstance2.registerConsumers();
    consumers.start();
    int maxMessagesInFlight = 1;
    AsyncRunLoop runLoop = new AsyncRunLoop(tasks, executor, consumers, maxMessagesInFlight, windowMs, commitMs, callbackTimeoutMs, maxThrottlingDelayMs, containerMetrics, () -> 0L, false);
    runLoop.run();
}
Also used : SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemConsumer(org.apache.samza.system.SystemConsumer) TaskInstance(org.apache.samza.container.TaskInstance) SystemConsumers(org.apache.samza.system.SystemConsumers) TestSystemConsumers(org.apache.samza.system.TestSystemConsumers) HashMap(java.util.HashMap) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) OffsetManager(org.apache.samza.checkpoint.OffsetManager) ArrayList(java.util.ArrayList) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet)

Example 33 with SystemStreamPartition

use of org.apache.samza.system.SystemStreamPartition in project samza by apache.

the class TaskConfigJava method getBroadcastSystemStreams.

/**
   * Get the SystemStreams for the configured broadcast streams.
   *
   * @return the set of SystemStreams for which there are broadcast stream SSPs configured.
   */
public Set<SystemStream> getBroadcastSystemStreams() {
    Set<SystemStream> broadcastSS = new HashSet<>();
    Set<SystemStreamPartition> broadcastSSPs = getBroadcastSystemStreamPartitions();
    for (SystemStreamPartition bssp : broadcastSSPs) {
        broadcastSS.add(bssp.getSystemStream());
    }
    return Collections.unmodifiableSet(broadcastSS);
}
Also used : SystemStream(org.apache.samza.system.SystemStream) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 34 with SystemStreamPartition

use of org.apache.samza.system.SystemStreamPartition in project samza by apache.

the class TaskConfigJava method getBroadcastSystemStreamPartitions.

/**
   * Get the systemStreamPartitions of the broadcast stream. Specifying
   * one partition for one stream or a range of the partitions for one
   * stream is allowed.
   *
   * @return a Set of SystemStreamPartitions
   */
public Set<SystemStreamPartition> getBroadcastSystemStreamPartitions() {
    HashSet<SystemStreamPartition> systemStreamPartitionSet = new HashSet<SystemStreamPartition>();
    List<String> systemStreamPartitions = getList(BROADCAST_INPUT_STREAMS, Collections.<String>emptyList());
    for (String systemStreamPartition : systemStreamPartitions) {
        int hashPosition = systemStreamPartition.indexOf("#");
        if (hashPosition == -1) {
            throw new IllegalArgumentException("incorrect format in " + systemStreamPartition + ". Broadcast stream names should be in the form 'system.stream#partitionId' or 'system.stream#[partitionN-partitionM]'");
        } else {
            String systemStreamName = systemStreamPartition.substring(0, hashPosition);
            String partitionSegment = systemStreamPartition.substring(hashPosition + 1);
            SystemStream systemStream = Util.getSystemStreamFromNames(systemStreamName);
            if (Pattern.matches(BROADCAST_STREAM_PATTERN, partitionSegment)) {
                systemStreamPartitionSet.add(new SystemStreamPartition(systemStream, new Partition(Integer.valueOf(partitionSegment))));
            } else {
                if (Pattern.matches(BROADCAST_STREAM_RANGE_PATTERN, partitionSegment)) {
                    int partitionStart = Integer.valueOf(partitionSegment.substring(1, partitionSegment.lastIndexOf("-")));
                    int partitionEnd = Integer.valueOf(partitionSegment.substring(partitionSegment.lastIndexOf("-") + 1, partitionSegment.indexOf("]")));
                    if (partitionStart > partitionEnd) {
                        LOGGER.warn("The starting partition in stream " + systemStream.toString() + " is bigger than the ending Partition. No partition is added");
                    }
                    for (int i = partitionStart; i <= partitionEnd; i++) {
                        systemStreamPartitionSet.add(new SystemStreamPartition(systemStream, new Partition(i)));
                    }
                } else {
                    throw new IllegalArgumentException("incorrect format in " + systemStreamPartition + ". Broadcast stream names should be in the form 'system.stream#partitionId' or 'system.stream#[partitionN-partitionM]'");
                }
            }
        }
    }
    return systemStreamPartitionSet;
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStream(org.apache.samza.system.SystemStream) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 35 with SystemStreamPartition

use of org.apache.samza.system.SystemStreamPartition in project samza by apache.

the class StorageRecovery method getTaskStorageManagers.

/**
   * create one TaskStorageManager for each task. Add all of them to the
   * List<TaskStorageManager>
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void getTaskStorageManagers() {
    StreamMetadataCache streamMetadataCache = new StreamMetadataCache(Util.javaMapAsScalaMap(systemAdmins), 5000, SystemClock.instance());
    for (ContainerModel containerModel : containers.values()) {
        HashMap<String, StorageEngine> taskStores = new HashMap<String, StorageEngine>();
        SamzaContainerContext containerContext = new SamzaContainerContext(containerModel.getProcessorId(), jobConfig, containerModel.getTasks().keySet());
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            HashMap<String, SystemConsumer> storeConsumers = getStoreConsumers();
            for (Entry<String, StorageEngineFactory<?, ?>> entry : storageEngineFactories.entrySet()) {
                String storeName = entry.getKey();
                if (changeLogSystemStreams.containsKey(storeName)) {
                    SystemStreamPartition changeLogSystemStreamPartition = new SystemStreamPartition(changeLogSystemStreams.get(storeName), taskModel.getChangelogPartition());
                    File storePartitionDir = TaskStorageManager.getStorePartitionDir(storeBaseDir, storeName, taskModel.getTaskName());
                    log.info("Got storage engine directory: " + storePartitionDir);
                    StorageEngine storageEngine = (entry.getValue()).getStorageEngine(storeName, storePartitionDir, (Serde) new ByteSerde(), (Serde) new ByteSerde(), null, new MetricsRegistryMap(), changeLogSystemStreamPartition, containerContext);
                    taskStores.put(storeName, storageEngine);
                }
            }
            TaskStorageManager taskStorageManager = new TaskStorageManager(taskModel.getTaskName(), Util.javaMapAsScalaMap(taskStores), Util.javaMapAsScalaMap(storeConsumers), Util.javaMapAsScalaMap(changeLogSystemStreams), maxPartitionNumber, streamMetadataCache, storeBaseDir, storeBaseDir, taskModel.getChangelogPartition(), Util.javaMapAsScalaMap(systemAdmins), new StorageConfig(jobConfig).getChangeLogDeleteRetentionsInMs(), new SystemClock());
            taskStorageManagers.add(taskStorageManager);
        }
    }
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SamzaContainerContext(org.apache.samza.container.SamzaContainerContext) SystemConsumer(org.apache.samza.system.SystemConsumer) SystemClock(org.apache.samza.util.SystemClock) HashMap(java.util.HashMap) StorageConfig(org.apache.samza.config.StorageConfig) JavaStorageConfig(org.apache.samza.config.JavaStorageConfig) ContainerModel(org.apache.samza.job.model.ContainerModel) ByteSerde(org.apache.samza.serializers.ByteSerde) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)43 Partition (org.apache.samza.Partition)29 Test (org.junit.Test)26 HashMap (java.util.HashMap)17 HashSet (java.util.HashSet)17 TaskName (org.apache.samza.container.TaskName)13 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)13 Config (org.apache.samza.config.Config)10 Set (java.util.Set)8 MapConfig (org.apache.samza.config.MapConfig)7 GenericRecord (org.apache.avro.generic.GenericRecord)6 ArrayList (java.util.ArrayList)5 List (java.util.List)5 SystemStream (org.apache.samza.system.SystemStream)5 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)4 SystemStreamMetadata (org.apache.samza.system.SystemStreamMetadata)4 LinkedHashMap (java.util.LinkedHashMap)3 SamzaException (org.apache.samza.SamzaException)3 TaskInstance (org.apache.samza.container.TaskInstance)3 SystemConsumer (org.apache.samza.system.SystemConsumer)3