Search in sources :

Example 16 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestRemoteTableEndToEnd method testSendToWithDefaultsAndUpdateOnly.

// Test fails with the following exception:
// org.apache.samza.SamzaException: Put default failed for update as the UpdateOptions was set to UPDATE_ONLY.
// Please use UpdateOptions.UPDATE_WITH_DEFAULTS instead.
@Test(expected = SamzaException.class)
public void testSendToWithDefaultsAndUpdateOnly() throws Exception {
    String testName = "testSendToWithDefaultsAndUpdateOnly";
    final String profiles = Base64Serializer.serialize(generateProfiles(30));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, false)).withWriteRateLimit(1000);
        // counters to count puts and updates
        COUNTERS.put(testName + "-put", new AtomicInteger());
        COUNTERS.put(testName + "-update", new AtomicInteger());
        final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
        final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
        final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_ONLY);
    };
    int numPageViews = 15;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
    TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
}
Also used : GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) ObjectInputStream(java.io.ObjectInputStream) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RemoteTable(org.apache.samza.table.remote.RemoteTable) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Counter(org.apache.samza.metrics.Counter) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Context(org.apache.samza.context.Context) RecordNotFoundException(org.apache.samza.table.RecordNotFoundException) TestRunner(org.apache.samza.test.framework.TestRunner) Matchers.any(org.mockito.Matchers.any) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData.generateProfiles(org.apache.samza.test.table.TestTableData.generateProfiles) CacheBuilder(com.google.common.cache.CacheBuilder) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) UpdateMessage(org.apache.samza.operators.UpdateMessage) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Profile(org.apache.samza.test.table.TestTableData.Profile) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) Base64Serializer(org.apache.samza.test.util.Base64Serializer) MockContext(org.apache.samza.context.MockContext) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) ExpectedException(org.junit.rules.ExpectedException) Timer(org.apache.samza.metrics.Timer) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) Matchers.anyString(org.mockito.Matchers.anyString) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) List(java.util.List) ArrayList(java.util.ArrayList) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) RemoteTable(org.apache.samza.table.remote.RemoteTable) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) KV(org.apache.samza.operators.KV) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) Test(org.junit.Test)

Example 17 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project beam by apache.

the class TranslationContext method createDummyStreamDescriptor.

/**
 * The dummy stream created will only be used in Beam tests.
 */
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
    final GenericSystemDescriptor dummySystem = new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
    final GenericInputDescriptor<OpMessage<String>> dummyInput = dummySystem.getInputDescriptor(id, new NoOpSerde<>());
    dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
    final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
    final SystemFactory factory = new InMemorySystemFactory();
    final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
    factory.getAdmin(id, config).createStream(dummyStreamSpec);
    final SystemProducer producer = factory.getProducer(id, config, null);
    final SystemStream sysStream = new SystemStream(id, id);
    final Consumer<Object> sendFn = (msg) -> {
        producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
    };
    final WindowedValue<String> windowedValue = WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());
    sendFn.accept(OpMessage.ofElement(windowedValue));
    sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
    sendFn.accept(new EndOfStreamMessage(null));
    return dummyInput;
}
Also used : InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) TransformInputs(org.apache.beam.runners.core.construction.TransformInputs) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) TupleTag(org.apache.beam.sdk.values.TupleTag) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) OutputDescriptor(org.apache.samza.system.descriptors.OutputDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) Logger(org.slf4j.Logger) Set(java.util.Set) SystemFactory(org.apache.samza.system.SystemFactory) StreamSpec(org.apache.samza.system.StreamSpec) UUID(java.util.UUID) PCollection(org.apache.beam.sdk.values.PCollection) HashIdGenerator(org.apache.beam.runners.samza.util.HashIdGenerator) Consumer(java.util.function.Consumer) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) List(java.util.List) PValue(org.apache.beam.sdk.values.PValue) SystemProducer(org.apache.samza.system.SystemProducer) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) Config(org.apache.samza.config.Config) Collections(java.util.Collections) OutputStream(org.apache.samza.operators.OutputStream) StreamSpec(org.apache.samza.system.StreamSpec) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) SystemFactory(org.apache.samza.system.SystemFactory) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) SystemProducer(org.apache.samza.system.SystemProducer) SystemStream(org.apache.samza.system.SystemStream) Instant(org.joda.time.Instant) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MapConfig(org.apache.samza.config.MapConfig) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory)

Example 18 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestOperatorImplGraph method testJoinChain.

@Test
public void testJoinChain() {
    String inputStreamId1 = "input1";
    String inputStreamId2 = "input2";
    String inputSystem = "input-system";
    String inputPhysicalName1 = "input-stream1";
    String inputPhysicalName2 = "input-stream2";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputPhysicalName1);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputPhysicalName2);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    Integer joinKey = new Integer(1);
    Function<Object, Integer> keyFn = (Function & Serializable) m -> joinKey;
    JoinFunction testJoinFunction = new TestJoinFunction("jobName-jobId-join-j1", (BiFunction & Serializable) (m1, m2) -> KV.of(m1, m2), keyFn, keyFn);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor1 = sd.getInputDescriptor(inputStreamId1, mock(Serde.class));
        GenericInputDescriptor inputDescriptor2 = sd.getInputDescriptor(inputStreamId2, mock(Serde.class));
        MessageStream<Object> inputStream1 = appDesc.getInputStream(inputDescriptor1);
        MessageStream<Object> inputStream2 = appDesc.getInputStream(inputDescriptor2);
        inputStream1.join(inputStream2, testJoinFunction, mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j1");
    }, config);
    TaskName mockTaskName = mock(TaskName.class);
    TaskModel taskModel = mock(TaskModel.class);
    when(taskModel.getTaskName()).thenReturn(mockTaskName);
    when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
    KeyValueStore mockLeftStore = mock(KeyValueStore.class);
    when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-L"))).thenReturn(mockLeftStore);
    KeyValueStore mockRightStore = mock(KeyValueStore.class);
    when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-R"))).thenReturn(mockRightStore);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    // verify that join function is initialized once.
    assertEquals(TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1").numInitCalled, 1);
    InputOperatorImpl inputOpImpl1 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName1));
    InputOperatorImpl inputOpImpl2 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName2));
    PartialJoinOperatorImpl leftPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl1.registeredOperators.iterator().next();
    PartialJoinOperatorImpl rightPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl2.registeredOperators.iterator().next();
    assertEquals(leftPartialJoinOpImpl.getOperatorSpec(), rightPartialJoinOpImpl.getOperatorSpec());
    assertNotSame(leftPartialJoinOpImpl, rightPartialJoinOpImpl);
    // verify that left partial join operator calls getFirstKey
    Object mockLeftMessage = mock(Object.class);
    long currentTimeMillis = System.currentTimeMillis();
    when(mockLeftStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockLeftMessage, currentTimeMillis));
    IncomingMessageEnvelope leftMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockLeftMessage);
    inputOpImpl1.onMessage(leftMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
    // verify that right partial join operator calls getSecondKey
    Object mockRightMessage = mock(Object.class);
    when(mockRightStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockRightMessage, currentTimeMillis));
    IncomingMessageEnvelope rightMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockRightMessage);
    inputOpImpl2.onMessage(rightMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
    // verify that the join function apply is called with the correct messages on match
    assertEquals(((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.size(), 1);
    KV joinResult = (KV) ((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.iterator().next();
    assertEquals(joinResult.getKey(), mockLeftMessage);
    assertEquals(joinResult.getValue(), mockRightMessage);
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) Serializable(java.io.Serializable) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MessageCollector(org.apache.samza.task.MessageCollector) MapConfig(org.apache.samza.config.MapConfig) SystemStream(org.apache.samza.system.SystemStream) TaskCoordinator(org.apache.samza.task.TaskCoordinator) KV(org.apache.samza.operators.KV) BiFunction(java.util.function.BiFunction) TaskName(org.apache.samza.container.TaskName) JoinFunction(org.apache.samza.operators.functions.JoinFunction) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 19 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestOperatorImplGraph method testPartitionByChain.

@Test
public void testPartitionByChain() {
    String inputStreamId = "input";
    String inputSystem = "input-system";
    String inputPhysicalName = "input-stream";
    String outputStreamId = "output";
    String outputSystem = "output-system";
    String outputPhysicalName = "output-stream";
    String intermediateStreamId = "jobName-jobId-partition_by-p1";
    String intermediateSystem = "intermediate-system";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor = isd.getInputDescriptor(inputStreamId, mock(Serde.class));
        GenericOutputDescriptor outputDescriptor = osd.getOutputDescriptor(outputStreamId, KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)));
        MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
        OutputStream<KV<Integer, String>> outputStream = appDesc.getOutputStream(outputDescriptor);
        inputStream.partitionBy(Object::hashCode, Object::toString, KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)), "p1").sendTo(outputStream);
    }, config);
    JobModel jobModel = mock(JobModel.class);
    ContainerModel containerModel = mock(ContainerModel.class);
    TaskModel taskModel = mock(TaskModel.class);
    when(jobModel.getContainers()).thenReturn(Collections.singletonMap("0", containerModel));
    when(containerModel.getTasks()).thenReturn(Collections.singletonMap(new TaskName("task 0"), taskModel));
    when(taskModel.getSystemStreamPartitions()).thenReturn(Collections.emptySet());
    when(((TaskContextImpl) this.context.getTaskContext()).getJobModel()).thenReturn(jobModel);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
    assertEquals(1, inputOpImpl.registeredOperators.size());
    OperatorImpl partitionByOpImpl = (PartitionByOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
    // is terminal but paired with an input operator
    assertEquals(0, partitionByOpImpl.registeredOperators.size());
    assertEquals(OpCode.PARTITION_BY, partitionByOpImpl.getOperatorSpec().getOpCode());
    InputOperatorImpl repartitionedInputOpImpl = opImplGraph.getInputOperator(new SystemStream(intermediateSystem, intermediateStreamId));
    assertEquals(1, repartitionedInputOpImpl.registeredOperators.size());
    OperatorImpl sendToOpImpl = (OutputOperatorImpl) repartitionedInputOpImpl.registeredOperators.iterator().next();
    assertEquals(0, sendToOpImpl.registeredOperators.size());
    assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StringSerde(org.apache.samza.serializers.StringSerde) HashMap(java.util.HashMap) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) IntegerSerde(org.apache.samza.serializers.IntegerSerde) ContainerModel(org.apache.samza.job.model.ContainerModel) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) JobModel(org.apache.samza.job.model.JobModel) MapConfig(org.apache.samza.config.MapConfig) SystemStream(org.apache.samza.system.SystemStream) KV(org.apache.samza.operators.KV) TaskContextImpl(org.apache.samza.context.TaskContextImpl) TaskName(org.apache.samza.container.TaskName) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) Test(org.junit.Test)

Example 20 with GenericInputDescriptor

use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.

the class TestOperatorImplGraph method testMergeChain.

@Test
public void testMergeChain() {
    String inputStreamId = "input";
    String inputSystem = "input-system";
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
        MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
        MessageStream<Object> stream1 = inputStream.filter(mock(FilterFunction.class));
        MessageStream<Object> stream2 = inputStream.map(mock(MapFunction.class));
        stream1.merge(Collections.singleton(stream2)).map(new TestMapFunction<Object, Object>("test-map-1", (Function & Serializable) m -> m));
    }, getConfig());
    TaskName mockTaskName = mock(TaskName.class);
    TaskModel taskModel = mock(TaskModel.class);
    when(taskModel.getTaskName()).thenReturn(mockTaskName);
    when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    Set<OperatorImpl> opSet = opImplGraph.getAllInputOperators().stream().collect(HashSet::new, (s, op) -> addOperatorRecursively(s, op), HashSet::addAll);
    Object[] mergeOps = opSet.stream().filter(op -> op.getOperatorSpec().getOpCode() == OpCode.MERGE).toArray();
    assertEquals(1, mergeOps.length);
    assertEquals(1, ((OperatorImpl) mergeOps[0]).registeredOperators.size());
    OperatorImpl mapOp = (OperatorImpl) ((OperatorImpl) mergeOps[0]).registeredOperators.iterator().next();
    assertEquals(mapOp.getOperatorSpec().getOpCode(), OpCode.MAP);
    // verify that the DAG after merge is only traversed & initialized once
    assertEquals(TestMapFunction.getInstanceByTaskName(mockTaskName, "test-map-1").numInitCalled, 1);
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) FilterFunction(org.apache.samza.operators.functions.FilterFunction) MapFunction(org.apache.samza.operators.functions.MapFunction) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TaskName(org.apache.samza.container.TaskName) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)31 Test (org.junit.Test)29 NoOpSerde (org.apache.samza.serializers.NoOpSerde)23 KV (org.apache.samza.operators.KV)20 KVSerde (org.apache.samza.serializers.KVSerde)20 GenericSystemDescriptor (org.apache.samza.system.descriptors.GenericSystemDescriptor)20 HashMap (java.util.HashMap)19 Duration (java.time.Duration)18 Map (java.util.Map)17 MapConfig (org.apache.samza.config.MapConfig)17 Serde (org.apache.samza.serializers.Serde)17 List (java.util.List)16 IntegerSerde (org.apache.samza.serializers.IntegerSerde)16 ArrayList (java.util.ArrayList)14 Function (java.util.function.Function)12 StreamApplication (org.apache.samza.application.StreamApplication)12 JobConfig (org.apache.samza.config.JobConfig)12 Collectors (java.util.stream.Collectors)11 Config (org.apache.samza.config.Config)11 StringSerde (org.apache.samza.serializers.StringSerde)11