Search in sources :

Example 1 with InMemoryOutputDescriptor

use of org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor in project samza by apache.

the class TestRunner method consumeStream.

/**
 * Gets the contents of the output stream represented by {@code outputDescriptor} after {@link TestRunner#run(Duration)}
 * has completed
 *
 * @param outputDescriptor describes the stream to be consumed
 * @param timeout timeout for consumption of stream in Ms
 * @param <StreamMessageType> type of message
 *
 * @return a map whose key is {@code partitionId} and value is messages in partition
 * @throws SamzaException Thrown when a poll is incomplete
 */
public static <StreamMessageType> Map<Integer, List<StreamMessageType>> consumeStream(InMemoryOutputDescriptor outputDescriptor, Duration timeout) throws SamzaException {
    Preconditions.checkNotNull(outputDescriptor);
    String streamId = outputDescriptor.getStreamId();
    String systemName = outputDescriptor.getSystemName();
    Set<SystemStreamPartition> ssps = new HashSet<>();
    Set<String> streamIds = new HashSet<>();
    streamIds.add(streamId);
    SystemFactory factory = new InMemorySystemFactory();
    Config config = new MapConfig(outputDescriptor.toConfig(), outputDescriptor.getSystemDescriptor().toConfig());
    Map<String, SystemStreamMetadata> metadata = factory.getAdmin(systemName, config).getSystemStreamMetadata(streamIds);
    SystemConsumer consumer = factory.getConsumer(systemName, config, null);
    String name = (String) outputDescriptor.getPhysicalName().orElse(streamId);
    metadata.get(name).getSystemStreamPartitionMetadata().keySet().forEach(partition -> {
        SystemStreamPartition temp = new SystemStreamPartition(systemName, streamId, partition);
        ssps.add(temp);
        consumer.register(temp, "0");
    });
    long t = System.currentTimeMillis();
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> output = new HashMap<>();
    HashSet<SystemStreamPartition> didNotReachEndOfStream = new HashSet<>(ssps);
    while (System.currentTimeMillis() < t + timeout.toMillis()) {
        Map<SystemStreamPartition, List<IncomingMessageEnvelope>> currentState = null;
        try {
            currentState = consumer.poll(ssps, 10);
        } catch (InterruptedException e) {
            throw new SamzaException("Timed out while consuming stream \n" + e.getMessage());
        }
        for (Map.Entry<SystemStreamPartition, List<IncomingMessageEnvelope>> entry : currentState.entrySet()) {
            SystemStreamPartition ssp = entry.getKey();
            output.computeIfAbsent(ssp, k -> new LinkedList<IncomingMessageEnvelope>());
            List<IncomingMessageEnvelope> currentBuffer = entry.getValue();
            int totalMessagesToFetch = Integer.valueOf(metadata.get(outputDescriptor.getStreamId()).getSystemStreamPartitionMetadata().get(ssp.getPartition()).getUpcomingOffset());
            if (output.get(ssp).size() + currentBuffer.size() == totalMessagesToFetch) {
                didNotReachEndOfStream.remove(entry.getKey());
                ssps.remove(entry.getKey());
            }
            output.get(ssp).addAll(currentBuffer);
        }
        if (didNotReachEndOfStream.isEmpty()) {
            break;
        }
    }
    if (!didNotReachEndOfStream.isEmpty()) {
        throw new IllegalStateException("Could not poll for all system stream partitions");
    }
    return output.entrySet().stream().collect(Collectors.toMap(entry -> entry.getKey().getPartition().getPartitionId(), entry -> entry.getValue().stream().map(e -> (StreamMessageType) e.getMessage()).collect(Collectors.toList())));
}
Also used : InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) LoggerFactory(org.slf4j.LoggerFactory) InMemorySystemProducer(org.apache.samza.system.inmemory.InMemorySystemProducer) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) FileUtil(org.apache.samza.util.FileUtil) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) SystemConsumer(org.apache.samza.system.SystemConsumer) Duration(java.time.Duration) Map(java.util.Map) StreamTask(org.apache.samza.task.StreamTask) SamzaApplication(org.apache.samza.application.SamzaApplication) InMemoryMetadataStoreFactory(org.apache.samza.metadatastore.InMemoryMetadataStoreFactory) ExternalContext(org.apache.samza.context.ExternalContext) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) AsyncStreamTask(org.apache.samza.task.AsyncStreamTask) StreamDescriptor(org.apache.samza.system.descriptors.StreamDescriptor) Set(java.util.Set) InMemorySystemConfig(org.apache.samza.config.InMemorySystemConfig) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) Collectors(java.util.stream.Collectors) List(java.util.List) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) ApplicationStatus(org.apache.samza.job.ApplicationStatus) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) StreamConfig(org.apache.samza.config.StreamConfig) HashSet(java.util.HashSet) SystemStream(org.apache.samza.system.SystemStream) ApplicationConfig(org.apache.samza.config.ApplicationConfig) LinkedList(java.util.LinkedList) LocalApplicationRunner(org.apache.samza.runtime.LocalApplicationRunner) InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) Logger(org.slf4j.Logger) TaskConfig(org.apache.samza.config.TaskConfig) JobPlanner(org.apache.samza.execution.JobPlanner) SystemFactory(org.apache.samza.system.SystemFactory) StreamSpec(org.apache.samza.system.StreamSpec) File(java.io.File) SamzaException(org.apache.samza.SamzaException) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Preconditions(com.google.common.base.Preconditions) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) SystemConsumer(org.apache.samza.system.SystemConsumer) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) SystemFactory(org.apache.samza.system.SystemFactory) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) InMemorySystemConfig(org.apache.samza.config.InMemorySystemConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) StreamConfig(org.apache.samza.config.StreamConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) TaskConfig(org.apache.samza.config.TaskConfig) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) SamzaException(org.apache.samza.SamzaException) List(java.util.List) LinkedList(java.util.LinkedList) MapConfig(org.apache.samza.config.MapConfig) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) HashSet(java.util.HashSet) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) Map(java.util.Map) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 2 with InMemoryOutputDescriptor

use of org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor in project samza by apache.

the class TestContext method testSamzaJobFailureForSyncTask.

/**
 * Samza job logic expects integers, but doubles are passed here which results in failure
 */
@Test(expected = SamzaException.class)
public void testSamzaJobFailureForSyncTask() {
    List<Double> inputList = Arrays.asList(1.2, 2.3, 3.33, 4.5);
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<Double> imid = isd.getInputDescriptor("doubles", new NoOpSerde<Double>());
    InMemoryOutputDescriptor imod = isd.getOutputDescriptor("output", new NoOpSerde<>());
    TestRunner.of(MyStreamTestTask.class).addInputStream(imid, inputList).addOutputStream(imod, 1).addExternalContext(new TestContext(10)).run(Duration.ofSeconds(1));
}
Also used : InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Test(org.junit.Test)

Example 3 with InMemoryOutputDescriptor

use of org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor in project samza by apache.

the class AsyncStreamTaskIntegrationTest method testAsyncTaskWithSinglePartition.

@Test
public void testAsyncTaskWithSinglePartition() throws Exception {
    List<Integer> inputList = Arrays.asList(1, 2, 3, 4, 5);
    List<Integer> outputList = Arrays.asList(10, 20, 30, 40, 50);
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("async-test");
    InMemoryInputDescriptor<Integer> imid = isd.getInputDescriptor("ints", new NoOpSerde<Integer>());
    InMemoryOutputDescriptor imod = isd.getOutputDescriptor("ints-out", new NoOpSerde<>());
    TestRunner.of(MyAsyncStreamTask.class).addInputStream(imid, inputList).addOutputStream(imod, 1).run(Duration.ofSeconds(2));
    Assert.assertThat(TestRunner.consumeStream(imod, Duration.ofMillis(1000)).get(0), IsIterableContainingInOrder.contains(outputList.toArray()));
}
Also used : InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Test(org.junit.Test)

Example 4 with InMemoryOutputDescriptor

use of org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor in project samza by apache.

the class StreamApplicationIntegrationTest method testStatefulJoinWithLocalTable.

@Test
public void testStatefulJoinWithLocalTable() {
    Random random = new Random();
    List<KV<String, TestTableData.PageView>> pageViews = Arrays.asList(TestTableData.generatePageViews(10)).stream().map(x -> KV.of(PAGEKEYS[random.nextInt(PAGEKEYS.length)], x)).collect(Collectors.toList());
    List<KV<String, TestTableData.Profile>> profiles = Arrays.asList(TestTableData.generateProfiles(10)).stream().map(x -> KV.of(PAGEKEYS[random.nextInt(PAGEKEYS.length)], x)).collect(Collectors.toList());
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<KV<String, TestTableData.PageView>> pageViewStreamDesc = isd.getInputDescriptor("PageView", new NoOpSerde<KV<String, TestTableData.PageView>>());
    InMemoryInputDescriptor<KV<String, TestTableData.Profile>> profileStreamDesc = isd.getInputDescriptor("Profile", new NoOpSerde<KV<String, TestTableData.Profile>>()).shouldBootstrap();
    InMemoryOutputDescriptor<TestTableData.EnrichedPageView> outputStreamDesc = isd.getOutputDescriptor("EnrichedPageView", new NoOpSerde<>());
    InMemoryOutputDescriptor<String> joinKeysDescriptor = isd.getOutputDescriptor("JoinPageKeys", new NoOpSerde<>());
    TestRunner.of(new PageViewProfileViewJoinApplication()).addInputStream(pageViewStreamDesc, pageViews).addInputStream(profileStreamDesc, profiles).addOutputStream(outputStreamDesc, 1).addOutputStream(joinKeysDescriptor, 1).run(Duration.ofSeconds(2));
    Assert.assertEquals(10, TestRunner.consumeStream(outputStreamDesc, Duration.ofSeconds(1)).get(0).size());
    Assert.assertEquals(10, TestRunner.consumeStream(joinKeysDescriptor, Duration.ofSeconds(1)).get(0).size());
}
Also used : RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) PageView(org.apache.samza.test.controlmessages.TestData.PageView) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) TestTableData(org.apache.samza.test.table.TestTableData) SystemStream(org.apache.samza.system.SystemStream) Duration(java.time.Duration) KV(org.apache.samza.operators.KV) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) Logger(org.slf4j.Logger) TestData(org.apache.samza.test.controlmessages.TestData) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) Test(org.junit.Test) Collectors(java.util.stream.Collectors) SamzaException(org.apache.samza.SamzaException) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) PageViewToProfileJoinFunction(org.apache.samza.test.table.PageViewToProfileJoinFunction) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Random(java.util.Random) NoOpSerde(org.apache.samza.serializers.NoOpSerde) TestTableData(org.apache.samza.test.table.TestTableData) Test(org.junit.Test)

Example 5 with InMemoryOutputDescriptor

use of org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor in project samza by apache.

the class AsyncStreamTaskIntegrationTest method testAsyncTaskWithMultiplePartitionMultithreaded.

@Test
public void testAsyncTaskWithMultiplePartitionMultithreaded() throws Exception {
    Map<Integer, List<KV>> inputPartitionData = new HashMap<>();
    Map<Integer, List<Integer>> expectedOutputPartitionData = new HashMap<>();
    genData(inputPartitionData, expectedOutputPartitionData);
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("async-test");
    InMemoryInputDescriptor<KV> imid = isd.getInputDescriptor("ints", new NoOpSerde<>());
    InMemoryOutputDescriptor imod = isd.getOutputDescriptor("ints-out", new NoOpSerde<>());
    TestRunner.of(MyAsyncStreamTask.class).addInputStream(imid, inputPartitionData).addOutputStream(imod, 5).addConfig("task.max.concurrency", "4").run(Duration.ofSeconds(2));
    StreamAssert.containsInAnyOrder(expectedOutputPartitionData, imod, Duration.ofMillis(1000));
}
Also used : InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Test(org.junit.Test)

Aggregations

InMemoryOutputDescriptor (org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor)8 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)8 Test (org.junit.Test)7 List (java.util.List)4 KV (org.apache.samza.operators.KV)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Duration (java.time.Duration)2 Collectors (java.util.stream.Collectors)2 SamzaException (org.apache.samza.SamzaException)2 OutgoingMessageEnvelope (org.apache.samza.system.OutgoingMessageEnvelope)2 Preconditions (com.google.common.base.Preconditions)1 File (java.io.File)1 Arrays (java.util.Arrays)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 Random (java.util.Random)1 Set (java.util.Set)1 RandomStringUtils (org.apache.commons.lang3.RandomStringUtils)1