Search in sources :

Example 6 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithInvalidStreamTableJoinWithSideInputs.

private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() {
    /**
     * Example stream-table join that is invalid due to disagreement in partition count between the
     * stream behind table t and another joined stream. Table t is configured with input2 (16) as
     * side-input stream.
     *
     *                   join-table t -> output1 (8)
     *                         |
     *    input1 (64) —————————
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde())).withSideInputs(Arrays.asList("input2")).withSideInputsProcessor(mock(SideInputsProcessor.class));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream1.join(table, mock(StreamTableJoinFunction.class)).sendTo(output1);
    }, config);
}
Also used : KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor)

Example 7 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestJobNodeConfigurationGenerator method testStreamApplicationWithTableAndSideInput.

@Test
public void testStreamApplicationWithTableAndSideInput() {
    mockStreamAppDesc = new StreamApplicationDescriptorImpl(getRepartitionJoinStreamApplication(), mockConfig);
    // add table to the RepartitionJoinStreamApplication
    GenericInputDescriptor<KV<String, Object>> sideInput1 = inputSystemDescriptor.getInputDescriptor("sideInput1", defaultSerde);
    BaseTableDescriptor mockTableDescriptor = new MockLocalTableDescriptor("testTable", defaultSerde).withSideInputs(Arrays.asList(sideInput1.getStreamId())).withSideInputsProcessor(mock(SideInputsProcessor.class, withSettings().serializable())).withConfig("mock.table.provider.config", "mock.config.value");
    // add side input and terminate at table in the appplication
    mockStreamAppDesc.getInputStream(sideInput1).sendTo(mockStreamAppDesc.getTable(mockTableDescriptor));
    StreamEdge sideInputEdge = new StreamEdge(new StreamSpec(sideInput1.getStreamId(), "sideInput1", inputSystemDescriptor.getSystemName()), false, false, mockConfig);
    // need to put the sideInput related stream configuration to the original config
    // TODO: this is confusing since part of the system and stream related configuration is generated outside the JobGraphConfigureGenerator
    // It would be nice if all system and stream related configuration is generated in one place and only intermediate stream
    // configuration is generated by JobGraphConfigureGenerator
    Map<String, String> configs = new HashMap<>(mockConfig);
    configs.putAll(sideInputEdge.generateConfig());
    mockConfig = spy(new MapConfig(configs));
    configureJobNode(mockStreamAppDesc);
    // create the JobGraphConfigureGenerator and generate the jobConfig for the jobNode
    JobNodeConfigurationGenerator configureGenerator = new JobNodeConfigurationGenerator();
    JobConfig jobConfig = configureGenerator.generateJobConfig(mockJobNode, "testJobGraphJson");
    Config expectedJobConfig = getExpectedJobConfig(mockConfig, mockJobNode.getInEdges());
    validateJobConfig(expectedJobConfig, jobConfig);
    Map<String, Serde> deserializedSerdes = validateAndGetDeserializedSerdes(jobConfig, 5);
    validateTableConfigure(jobConfig, deserializedSerdes, mockTableDescriptor);
}
Also used : Serde(org.apache.samza.serializers.Serde) StringSerde(org.apache.samza.serializers.StringSerde) SerializableSerde(org.apache.samza.serializers.SerializableSerde) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) StreamSpec(org.apache.samza.system.StreamSpec) HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) MapConfig(org.apache.samza.config.MapConfig) SerializerConfig(org.apache.samza.config.SerializerConfig) TaskConfig(org.apache.samza.config.TaskConfig) Config(org.apache.samza.config.Config) KV(org.apache.samza.operators.KV) MockLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor.MockLocalTableDescriptor) BaseTableDescriptor(org.apache.samza.table.descriptors.BaseTableDescriptor) JobConfig(org.apache.samza.config.JobConfig) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) Test(org.junit.Test)

Example 8 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestStreamTableJoinOperatorImpl method testHandleMessage.

@Test
public void testHandleMessage() {
    String tableId = "t1";
    StreamTableJoinOperatorSpec mockJoinOpSpec = mock(StreamTableJoinOperatorSpec.class);
    when(mockJoinOpSpec.getTableId()).thenReturn(tableId);
    when(mockJoinOpSpec.getArgs()).thenReturn(new Object[0]);
    when(mockJoinOpSpec.getJoinFn()).thenReturn(new StreamTableJoinFunction<String, KV<String, String>, KV<String, String>, String>() {

        @Override
        public String apply(KV<String, String> message, KV<String, String> record) {
            if ("1".equals(message.getKey())) {
                Assert.assertEquals("m1", message.getValue());
                Assert.assertEquals("r1", record.getValue());
                return "m1r1";
            } else if ("2".equals(message.getKey())) {
                Assert.assertEquals("m2", message.getValue());
                Assert.assertNull(record);
                return null;
            }
            throw new SamzaException("Should never reach here!");
        }

        @Override
        public String getMessageKey(KV<String, String> message) {
            return message.getKey();
        }

        @Override
        public String getRecordKey(KV<String, String> record) {
            return record.getKey();
        }
    });
    ReadWriteUpdateTable table = mock(ReadWriteUpdateTable.class);
    when(table.getAsync("1")).thenReturn(CompletableFuture.completedFuture("r1"));
    when(table.getAsync("2")).thenReturn(CompletableFuture.completedFuture(null));
    Context context = new MockContext();
    when(context.getTaskContext().getUpdatableTable(tableId)).thenReturn(table);
    MessageCollector mockMessageCollector = mock(MessageCollector.class);
    TaskCoordinator mockTaskCoordinator = mock(TaskCoordinator.class);
    StreamTableJoinOperatorImpl streamTableJoinOperator = new StreamTableJoinOperatorImpl(mockJoinOpSpec, context);
    // Table has the key
    Collection<TestMessageEnvelope> result;
    result = streamTableJoinOperator.handleMessage(KV.of("1", "m1"), mockMessageCollector, mockTaskCoordinator);
    Assert.assertEquals(1, result.size());
    Assert.assertEquals("m1r1", result.iterator().next());
    // Table doesn't have the key
    result = streamTableJoinOperator.handleMessage(KV.of("2", "m2"), mockMessageCollector, mockTaskCoordinator);
    Assert.assertEquals(0, result.size());
}
Also used : Context(org.apache.samza.context.Context) MockContext(org.apache.samza.context.MockContext) ReadWriteUpdateTable(org.apache.samza.table.ReadWriteUpdateTable) MockContext(org.apache.samza.context.MockContext) TaskCoordinator(org.apache.samza.task.TaskCoordinator) KV(org.apache.samza.operators.KV) SamzaException(org.apache.samza.SamzaException) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) MessageCollector(org.apache.samza.task.MessageCollector) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) Test(org.junit.Test)

Example 9 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestOperatorSpec method testStreamOperatorSpecWithMapWithFunctionReference.

@Test
public void testStreamOperatorSpecWithMapWithFunctionReference() {
    MapFunction<KV<String, Object>, Object> mapFn = KV::getValue;
    StreamOperatorSpec<KV<String, Object>, Object> streamOperatorSpec = OperatorSpecs.createMapOperatorSpec(mapFn, "op0");
    StreamOperatorSpec<TestMessageEnvelope, TestOutputMessageEnvelope> cloneOperatorSpec = (StreamOperatorSpec<TestMessageEnvelope, TestOutputMessageEnvelope>) OperatorSpecTestUtils.copyOpSpec(streamOperatorSpec);
    assertNotEquals(streamOperatorSpec, cloneOperatorSpec);
    assertTrue(streamOperatorSpec.isClone(cloneOperatorSpec));
    MapFunction userFn = (MapFunction) Whitebox.getInternalState(streamOperatorSpec, "mapFn");
    assertEquals(userFn, mapFn);
    assertNotEquals(streamOperatorSpec.getTransformFn(), cloneOperatorSpec.getTransformFn());
    MapFunction clonedUserFn = (MapFunction) Whitebox.getInternalState(cloneOperatorSpec, "mapFn");
    assertTrue(cloneOperatorSpec.getTransformFn() instanceof FlatMapFunction);
    assertTrue(clonedUserFn instanceof MapFunction);
    assertNotEquals(userFn, clonedUserFn);
}
Also used : TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) TestOutputMessageEnvelope(org.apache.samza.operators.data.TestOutputMessageEnvelope) KV(org.apache.samza.operators.KV) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) MapFunction(org.apache.samza.operators.functions.MapFunction) Test(org.junit.Test)

Example 10 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestCouchbaseRemoteTableEndToEnd method testEndToEnd.

@Test
public void testEndToEnd() {
    Bucket inputBucket = cluster.openBucket(inputBucketName);
    inputBucket.upsert(ByteArrayDocument.create("Alice", "20".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("Bob", "30".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("Chris", "40".getBytes()));
    inputBucket.upsert(ByteArrayDocument.create("David", "50".getBytes()));
    inputBucket.close();
    List<String> users = Arrays.asList("Alice", "Bob", "Chris", "David");
    final StreamApplication app = appDesc -> {
        DelegatingSystemDescriptor inputSystemDescriptor = new DelegatingSystemDescriptor("test");
        GenericInputDescriptor<String> inputDescriptor = inputSystemDescriptor.getInputDescriptor("User", new NoOpSerde<>());
        CouchbaseTableReadFunction<String> readFunction = new CouchbaseTableReadFunction<>(inputBucketName, String.class, "couchbase://127.0.0.1").withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(inputBucketName)).withBootstrapHttpDirectPort(couchbaseMock.getHttpPort()).withSerde(new StringSerde());
        CouchbaseTableWriteFunction<JsonObject> writeFunction = new CouchbaseTableWriteFunction<>(outputBucketName, JsonObject.class, "couchbase://127.0.0.1").withBootstrapCarrierDirectPort(couchbaseMock.getCarrierPort(outputBucketName)).withBootstrapHttpDirectPort(couchbaseMock.getHttpPort());
        RemoteTableDescriptor inputTableDesc = new RemoteTableDescriptor<String, String, Void>("input-table").withReadFunction(readFunction).withRateLimiterDisabled();
        Table<KV<String, String>> inputTable = appDesc.getTable(inputTableDesc);
        RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<String, JsonObject, Object>("output-table").withReadFunction(new NoOpTableReadFunction<>()).withWriteFunction(writeFunction).withRateLimiterDisabled();
        Table<KV<String, JsonObject>> outputTable = appDesc.getTable(outputTableDesc);
        appDesc.getInputStream(inputDescriptor).map(k -> KV.of(k, k)).join(inputTable, new JoinFunction()).sendTo(outputTable);
    };
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<TestTableData.PageView> inputDescriptor = isd.getInputDescriptor("User", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, users).run(Duration.ofSeconds(10));
    Bucket outputBucket = cluster.openBucket(outputBucketName);
    Assert.assertEquals("{\"name\":\"Alice\",\"age\":\"20\"}", outputBucket.get("Alice").content().toString());
    Assert.assertEquals("{\"name\":\"Bob\",\"age\":\"30\"}", outputBucket.get("Bob").content().toString());
    Assert.assertEquals("{\"name\":\"Chris\",\"age\":\"40\"}", outputBucket.get("Chris").content().toString());
    Assert.assertEquals("{\"name\":\"David\",\"age\":\"50\"}", outputBucket.get("David").content().toString());
    outputBucket.close();
}
Also used : CouchbaseTableWriteFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableWriteFunction) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) CouchbaseEnvironment(com.couchbase.client.java.env.CouchbaseEnvironment) CouchbaseTableReadFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableReadFunction) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) JsonObject(com.couchbase.client.java.document.json.JsonObject) BucketConfiguration(com.couchbase.mock.BucketConfiguration) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) DefaultCouchbaseEnvironment(com.couchbase.client.java.env.DefaultCouchbaseEnvironment) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) CouchbaseMock(com.couchbase.mock.CouchbaseMock) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) After(org.junit.After) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Before(org.junit.Before) Table(org.apache.samza.table.Table) ByteArrayDocument(com.couchbase.client.java.document.ByteArrayDocument) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Test(org.junit.Test) TestRunner(org.apache.samza.test.framework.TestRunner) Bucket(com.couchbase.client.java.Bucket) List(java.util.List) CouchbaseCluster(com.couchbase.client.java.CouchbaseCluster) Cluster(com.couchbase.client.java.Cluster) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) JsonObject(com.couchbase.client.java.document.json.JsonObject) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Bucket(com.couchbase.client.java.Bucket) CouchbaseTableWriteFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableWriteFunction) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) NoOpSerde(org.apache.samza.serializers.NoOpSerde) CouchbaseTableReadFunction(org.apache.samza.table.remote.couchbase.CouchbaseTableReadFunction) Test(org.junit.Test)

Aggregations

KV (org.apache.samza.operators.KV)68 Test (org.junit.Test)38 StringSerde (org.apache.samza.serializers.StringSerde)33 KVSerde (org.apache.samza.serializers.KVSerde)30 HashMap (java.util.HashMap)28 NoOpSerde (org.apache.samza.serializers.NoOpSerde)26 List (java.util.List)25 Duration (java.time.Duration)24 ArrayList (java.util.ArrayList)24 StreamApplication (org.apache.samza.application.StreamApplication)22 Config (org.apache.samza.config.Config)22 Map (java.util.Map)20 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)20 Table (org.apache.samza.table.Table)19 MapConfig (org.apache.samza.config.MapConfig)18 MessageStream (org.apache.samza.operators.MessageStream)18 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)18 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)17 Collectors (java.util.stream.Collectors)16 SamzaException (org.apache.samza.SamzaException)16