Search in sources :

Example 6 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class TestExecutionPlanner method testCreateJobGraphForTaskApplication.

@Test
public void testCreateJobGraphForTaskApplication() {
    TaskApplicationDescriptorImpl taskAppDesc = mock(TaskApplicationDescriptorImpl.class);
    // add interemediate streams
    String intermediateStream1 = "intermediate-stream1";
    String intermediateBroadcast = "intermediate-broadcast1";
    // intermediate stream1, not broadcast
    GenericInputDescriptor<KV<Object, Object>> intermediateInput1 = system1Descriptor.getInputDescriptor(intermediateStream1, new KVSerde<>(new NoOpSerde(), new NoOpSerde()));
    GenericOutputDescriptor<KV<Object, Object>> intermediateOutput1 = system1Descriptor.getOutputDescriptor(intermediateStream1, new KVSerde<>(new NoOpSerde(), new NoOpSerde()));
    // intermediate stream2, broadcast
    GenericInputDescriptor<KV<Object, Object>> intermediateBroacastInput1 = system1Descriptor.getInputDescriptor(intermediateBroadcast, new KVSerde<>(new NoOpSerde<>(), new NoOpSerde<>()));
    GenericOutputDescriptor<KV<Object, Object>> intermediateBroacastOutput1 = system1Descriptor.getOutputDescriptor(intermediateBroadcast, new KVSerde<>(new NoOpSerde<>(), new NoOpSerde<>()));
    inputDescriptors.put(intermediateStream1, intermediateInput1);
    outputDescriptors.put(intermediateStream1, intermediateOutput1);
    inputDescriptors.put(intermediateBroadcast, intermediateBroacastInput1);
    outputDescriptors.put(intermediateBroadcast, intermediateBroacastOutput1);
    Set<String> broadcastStreams = new HashSet<>();
    broadcastStreams.add(intermediateBroadcast);
    when(taskAppDesc.getInputDescriptors()).thenReturn(inputDescriptors);
    when(taskAppDesc.getInputStreamIds()).thenReturn(inputDescriptors.keySet());
    when(taskAppDesc.getOutputDescriptors()).thenReturn(outputDescriptors);
    when(taskAppDesc.getOutputStreamIds()).thenReturn(outputDescriptors.keySet());
    when(taskAppDesc.getTableDescriptors()).thenReturn(Collections.emptySet());
    when(taskAppDesc.getSystemDescriptors()).thenReturn(systemDescriptors);
    when(taskAppDesc.getIntermediateBroadcastStreamIds()).thenReturn(broadcastStreams);
    doReturn(MockTaskApplication.class).when(taskAppDesc).getAppClass();
    Map<String, String> systemStreamConfigs = new HashMap<>();
    inputDescriptors.forEach((key, value) -> systemStreamConfigs.putAll(value.toConfig()));
    outputDescriptors.forEach((key, value) -> systemStreamConfigs.putAll(value.toConfig()));
    systemDescriptors.forEach(sd -> systemStreamConfigs.putAll(sd.toConfig()));
    ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
    JobGraph jobGraph = planner.createJobGraph(taskAppDesc);
    assertEquals(1, jobGraph.getJobNodes().size());
    assertTrue(jobGraph.getInputStreams().stream().map(edge -> edge.getName()).filter(streamId -> inputDescriptors.containsKey(streamId)).collect(Collectors.toList()).isEmpty());
    Set<String> intermediateStreams = new HashSet<>(inputDescriptors.keySet());
    jobGraph.getInputStreams().forEach(edge -> {
        if (intermediateStreams.contains(edge.getStreamSpec().getId())) {
            intermediateStreams.remove(edge.getStreamSpec().getId());
        }
    });
    assertEquals(new HashSet<>(Arrays.asList(intermediateStream1, intermediateBroadcast)), intermediateStreams);
}
Also used : Arrays(java.util.Arrays) TaskApplicationDescriptorImpl(org.apache.samza.application.descriptors.TaskApplicationDescriptorImpl) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) Duration(java.time.Duration) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Mockito.doReturn(org.mockito.Mockito.doReturn) OutputDescriptor(org.apache.samza.system.descriptors.OutputDescriptor) Table(org.apache.samza.table.Table) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) Mockito.mock(org.mockito.Mockito.mock) SystemDescriptor(org.apache.samza.system.descriptors.SystemDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) StreamConfig(org.apache.samza.config.StreamConfig) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) ApplicationDescriptor(org.apache.samza.application.descriptors.ApplicationDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) Windows(org.apache.samza.operators.windows.Windows) TaskConfig(org.apache.samza.config.TaskConfig) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor) SamzaException(org.apache.samza.SamzaException) SystemAdmin(org.apache.samza.system.SystemAdmin) Assert(org.junit.Assert) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) TaskApplicationDescriptorImpl(org.apache.samza.application.descriptors.TaskApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithInvalidStreamTableJoinWithSideInputs.

private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() {
    /**
     * Example stream-table join that is invalid due to disagreement in partition count between the
     * stream behind table t and another joined stream. Table t is configured with input2 (16) as
     * side-input stream.
     *
     *                   join-table t -> output1 (8)
     *                         |
     *    input1 (64) —————————
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde())).withSideInputs(Arrays.asList("input2")).withSideInputsProcessor(mock(SideInputsProcessor.class));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream1.join(table, mock(StreamTableJoinFunction.class)).sendTo(output1);
    }, config);
}
Also used : KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor)

Example 8 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class TestJoinOperator method createStreamOperatorTask.

private StreamOperatorTask createStreamOperatorTask(Clock clock, StreamApplicationDescriptorImpl graphSpec) throws Exception {
    Map<String, String> mapConfig = new HashMap<>();
    mapConfig.put("job.name", "jobName");
    mapConfig.put("job.id", "jobId");
    StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
    StreamTestUtils.addStreamConfigs(mapConfig, "inStream2", "insystem", "instream2");
    Context context = new MockContext(new MapConfig(mapConfig));
    TaskModel taskModel = mock(TaskModel.class);
    when(taskModel.getSystemStreamPartitions()).thenReturn(ImmutableSet.of(new SystemStreamPartition("insystem", "instream", new Partition(0)), new SystemStreamPartition("insystem", "instream2", new Partition(0))));
    when(context.getTaskContext().getTaskModel()).thenReturn(taskModel);
    when(context.getTaskContext().getTaskMetricsRegistry()).thenReturn(new MetricsRegistryMap());
    when(context.getContainerContext().getContainerMetricsRegistry()).thenReturn(new MetricsRegistryMap());
    // need to return different stores for left and right side
    IntegerSerde integerSerde = new IntegerSerde();
    TimestampedValueSerde timestampedValueSerde = new TimestampedValueSerde(new KVSerde(integerSerde, integerSerde));
    when(context.getTaskContext().getStore(eq("jobName-jobId-join-j1-L"))).thenReturn(new TestInMemoryStore(integerSerde, timestampedValueSerde));
    when(context.getTaskContext().getStore(eq("jobName-jobId-join-j1-R"))).thenReturn(new TestInMemoryStore(integerSerde, timestampedValueSerde));
    StreamOperatorTask sot = new StreamOperatorTask(graphSpec.getOperatorSpecGraph(), clock);
    sot.init(context);
    return sot;
}
Also used : MockContext(org.apache.samza.context.MockContext) Context(org.apache.samza.context.Context) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) MockContext(org.apache.samza.context.MockContext) KVSerde(org.apache.samza.serializers.KVSerde) HashMap(java.util.HashMap) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TestInMemoryStore(org.apache.samza.operators.impl.store.TestInMemoryStore) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) MapConfig(org.apache.samza.config.MapConfig) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 9 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class TestMessageStreamImpl method testPartitionBy.

@Test
public void testPartitionBy() throws IOException {
    StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
    OperatorSpec mockOpSpec = mock(OperatorSpec.class);
    String mockOpName = "mockName";
    when(mockGraph.getNextOpId(anyObject(), anyObject())).thenReturn(mockOpName);
    OutputStreamImpl mockOutputStreamImpl = mock(OutputStreamImpl.class);
    KVSerde mockKVSerde = mock(KVSerde.class);
    IntermediateMessageStreamImpl mockIntermediateStream = mock(IntermediateMessageStreamImpl.class);
    when(mockGraph.getIntermediateStream(eq(mockOpName), eq(mockKVSerde), eq(false))).thenReturn(mockIntermediateStream);
    when(mockIntermediateStream.getOutputStream()).thenReturn(mockOutputStreamImpl);
    when(mockIntermediateStream.isKeyed()).thenReturn(true);
    MessageStreamImpl<TestMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph, mockOpSpec);
    MapFunction mockKeyFunction = mock(MapFunction.class);
    MapFunction mockValueFunction = mock(MapFunction.class);
    inputStream.partitionBy(mockKeyFunction, mockValueFunction, mockKVSerde, "p1");
    ArgumentCaptor<OperatorSpec> registeredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
    verify(mockOpSpec).registerNextOperatorSpec(registeredOpCaptor.capture());
    OperatorSpec<?, TestMessageEnvelope> registeredOpSpec = registeredOpCaptor.getValue();
    assertTrue(registeredOpSpec instanceof PartitionByOperatorSpec);
    assertEquals(OpCode.PARTITION_BY, registeredOpSpec.getOpCode());
    assertEquals(mockOutputStreamImpl, ((PartitionByOperatorSpec) registeredOpSpec).getOutputStream());
    assertEquals(mockKeyFunction, ((PartitionByOperatorSpec) registeredOpSpec).getKeyFunction());
    assertEquals(mockValueFunction, ((PartitionByOperatorSpec) registeredOpSpec).getValueFunction());
}
Also used : IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) KVSerde(org.apache.samza.serializers.KVSerde) OutputStreamImpl(org.apache.samza.operators.spec.OutputStreamImpl) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) MapFunction(org.apache.samza.operators.functions.MapFunction) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) Test(org.junit.Test)

Example 10 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class QueryTranslator method sendToOutputStream.

private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
    SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
    MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
    MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
    Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
    if (!tableDescriptor.isPresent()) {
        KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
        String systemName = sinkConfig.getSystemName();
        DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
        GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
        OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
        outputStream.sendTo(stm);
        // Process system events only if the output is a stream.
        if (sqlConfig.isProcessSystemEvents()) {
            for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
                MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
                systemEventStream.sendTo(stm);
            }
        }
    } else {
        Table outputTable = appDesc.getTable(tableDescriptor.get());
        if (outputTable == null) {
            String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
            throw new SamzaException(msg);
        }
        outputStream.sendTo(outputTable);
    }
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RelRoot(org.apache.calcite.rel.RelRoot) TaskContext(org.apache.samza.context.TaskContext) MapFunction(org.apache.samza.operators.functions.MapFunction) Counter(org.apache.samza.metrics.Counter) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) QueryPlanner(org.apache.samza.sql.planner.QueryPlanner) ApplicationContainerContext(org.apache.samza.context.ApplicationContainerContext) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Map(java.util.Map) TableModify(org.apache.calcite.rel.core.TableModify) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaHistogram(org.apache.samza.metrics.SamzaHistogram) ExternalContext(org.apache.samza.context.ExternalContext) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) RelNode(org.apache.calcite.rel.RelNode) SamzaException(org.apache.samza.SamzaException) ApplicationTaskContextFactory(org.apache.samza.context.ApplicationTaskContextFactory) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) Context(org.apache.samza.context.Context) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Validate(org.apache.commons.lang3.Validate) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) Optional(java.util.Optional) SamzaSqlApplicationContext(org.apache.samza.sql.runner.SamzaSqlApplicationContext) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) Table(org.apache.samza.table.Table) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaException(org.apache.samza.SamzaException) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Aggregations

KVSerde (org.apache.samza.serializers.KVSerde)29 KV (org.apache.samza.operators.KV)16 NoOpSerde (org.apache.samza.serializers.NoOpSerde)16 StringSerde (org.apache.samza.serializers.StringSerde)15 Serde (org.apache.samza.serializers.Serde)14 Test (org.junit.Test)11 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)10 MessageStream (org.apache.samza.operators.MessageStream)10 TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)10 Config (org.apache.samza.config.Config)9 GenericSystemDescriptor (org.apache.samza.system.descriptors.GenericSystemDescriptor)9 HashMap (java.util.HashMap)8 TestLocalTableDescriptor (org.apache.samza.table.descriptors.TestLocalTableDescriptor)8 Duration (java.time.Duration)7 MapConfig (org.apache.samza.config.MapConfig)7 OutputStream (org.apache.samza.operators.OutputStream)7 Windows (org.apache.samza.operators.windows.Windows)7 Table (org.apache.samza.table.Table)7 Map (java.util.Map)6 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)6