Search in sources :

Example 16 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class TestPartitionByOperatorSpec method testPartitionBy.

@Test
public void testPartitionBy() {
    MapFunction<Object, String> keyFn = m -> m.toString();
    MapFunction<Object, Object> valueFn = m -> m;
    KVSerde<Object, Object> partitionBySerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
    StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
        inputStream.partitionBy(keyFn, valueFn, partitionBySerde, testRepartitionedStreamName);
    }, getConfig());
    assertEquals(2, streamAppDesc.getInputOperators().size());
    Map<String, InputOperatorSpec> inputOpSpecs = streamAppDesc.getInputOperators();
    assertTrue(inputOpSpecs.keySet().contains(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)));
    InputOperatorSpec inputOpSpec = inputOpSpecs.get(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
    assertEquals(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName), inputOpSpec.getStreamId());
    assertTrue(inputOpSpec.getKeySerde() instanceof NoOpSerde);
    assertTrue(inputOpSpec.getValueSerde() instanceof NoOpSerde);
    assertTrue(inputOpSpec.isKeyed());
    assertNull(inputOpSpec.getScheduledFn());
    assertNull(inputOpSpec.getWatermarkFn());
    InputOperatorSpec originInputSpec = inputOpSpecs.get(testInputDescriptor.getStreamId());
    assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
    PartitionByOperatorSpec reparOpSpec = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
    assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
    assertEquals(reparOpSpec.getKeyFunction(), keyFn);
    assertEquals(reparOpSpec.getValueFunction(), valueFn);
    assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
    assertNull(reparOpSpec.getScheduledFn());
    assertNull(reparOpSpec.getWatermarkFn());
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) ScheduledFunction(org.apache.samza.operators.functions.ScheduledFunction) Assert.assertNotNull(org.junit.Assert.assertNotNull) Collection(java.util.Collection) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) Assert.assertTrue(org.junit.Assert.assertTrue) HashMap(java.util.HashMap) Scheduler(org.apache.samza.operators.Scheduler) Serde(org.apache.samza.serializers.Serde) Test(org.junit.Test) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) OperatorSpecGraph(org.apache.samza.operators.OperatorSpecGraph) MapFunction(org.apache.samza.operators.functions.MapFunction) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction) Assert.assertNull(org.junit.Assert.assertNull) Map(java.util.Map) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) MapConfig(org.apache.samza.config.MapConfig) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MessageStream(org.apache.samza.operators.MessageStream) Mockito.mock(org.mockito.Mockito.mock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MessageStream(org.apache.samza.operators.MessageStream) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Test(org.junit.Test)

Example 17 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class ApplicationDescriptorImpl method getOrCreateTableSerdes.

KV<Serde, Serde> getOrCreateTableSerdes(String tableId, KVSerde kvSerde) {
    Serde keySerde, valueSerde;
    keySerde = kvSerde.getKeySerde();
    valueSerde = kvSerde.getValueSerde();
    if (!tableSerdes.containsKey(tableId)) {
        tableSerdes.put(tableId, KV.of(keySerde, valueSerde));
        return tableSerdes.get(tableId);
    }
    KV<Serde, Serde> currentSerdePair = tableSerdes.get(tableId);
    if (!currentSerdePair.getKey().equals(keySerde) || !currentSerdePair.getValue().equals(valueSerde)) {
        throw new IllegalArgumentException(String.format("Serde for table %s is already defined. Cannot change it to " + "different serdes.", tableId));
    }
    return streamSerdes.get(tableId);
}
Also used : Serde(org.apache.samza.serializers.Serde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) KVSerde(org.apache.samza.serializers.KVSerde)

Example 18 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class StreamApplicationDescriptorImpl method getInputStream.

@Override
public <M> MessageStream<M> getInputStream(InputDescriptor<M, ?> inputDescriptor) {
    SystemDescriptor systemDescriptor = inputDescriptor.getSystemDescriptor();
    Optional<StreamExpander> expander = systemDescriptor.getExpander();
    if (expander.isPresent()) {
        return expander.get().apply(this, inputDescriptor);
    }
    // TODO: SAMZA-1841: need to add to the broadcast streams if inputDescriptor is for a broadcast stream
    addInputDescriptor(inputDescriptor);
    String streamId = inputDescriptor.getStreamId();
    Serde serde = inputDescriptor.getSerde();
    KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);
    boolean isKeyed = serde instanceof KVSerde;
    InputTransformer transformer = inputDescriptor.getTransformer().orElse(null);
    InputOperatorSpec inputOperatorSpec = OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(), transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null));
    inputOperators.put(streamId, inputOperatorSpec);
    return new MessageStreamImpl(this, inputOperators.get(streamId));
}
Also used : Serde(org.apache.samza.serializers.Serde) KVSerde(org.apache.samza.serializers.KVSerde) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) MessageStreamImpl(org.apache.samza.operators.MessageStreamImpl) SystemDescriptor(org.apache.samza.system.descriptors.SystemDescriptor) KVSerde(org.apache.samza.serializers.KVSerde) InputTransformer(org.apache.samza.system.descriptors.InputTransformer) StreamExpander(org.apache.samza.system.descriptors.StreamExpander)

Example 19 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class StreamApplicationDescriptorImpl method getOutputStream.

@Override
public <M> OutputStream<M> getOutputStream(OutputDescriptor<M, ?> outputDescriptor) {
    addOutputDescriptor(outputDescriptor);
    String streamId = outputDescriptor.getStreamId();
    Serde serde = outputDescriptor.getSerde();
    KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);
    boolean isKeyed = serde instanceof KVSerde;
    outputStreams.put(streamId, new OutputStreamImpl<>(streamId, kvSerdes.getKey(), kvSerdes.getValue(), isKeyed));
    return outputStreams.get(streamId);
}
Also used : Serde(org.apache.samza.serializers.Serde) KVSerde(org.apache.samza.serializers.KVSerde) KVSerde(org.apache.samza.serializers.KVSerde)

Example 20 with KVSerde

use of org.apache.samza.serializers.KVSerde in project samza by apache.

the class StreamApplicationDescriptorImpl method getIntermediateStream.

/**
 * Internal helper for {@link MessageStreamImpl} to add an intermediate {@link MessageStream} to the graph.
 * An intermediate {@link MessageStream} is both an output and an input stream.
 *
 * @param streamId the id of the stream to be created.
 * @param serde the {@link Serde} to use for the message in the intermediate stream. If null, the default serde
 *              is used.
 * @param isBroadcast whether the stream is a broadcast stream.
 * @param <M> the type of messages in the intermediate {@link MessageStream}
 * @return  the intermediate {@link MessageStreamImpl}
 */
@VisibleForTesting
public <M> IntermediateMessageStreamImpl<M> getIntermediateStream(String streamId, Serde<M> serde, boolean isBroadcast) {
    Preconditions.checkNotNull(serde, "serde must not be null for intermediate stream: " + streamId);
    Preconditions.checkState(!inputOperators.containsKey(streamId) && !outputStreams.containsKey(streamId), "getIntermediateStream must not be called multiple times with the same streamId: " + streamId);
    if (isBroadcast) {
        intermediateBroadcastStreamIds.add(streamId);
    }
    boolean isKeyed = serde instanceof KVSerde;
    KV<Serde, Serde> kvSerdes = getOrCreateStreamSerdes(streamId, serde);
    InputTransformer transformer = (InputTransformer) getDefaultSystemDescriptor().flatMap(SystemDescriptor::getTransformer).orElse(null);
    InputOperatorSpec inputOperatorSpec = OperatorSpecs.createInputOperatorSpec(streamId, kvSerdes.getKey(), kvSerdes.getValue(), transformer, isKeyed, this.getNextOpId(OpCode.INPUT, null));
    inputOperators.put(streamId, inputOperatorSpec);
    outputStreams.put(streamId, new OutputStreamImpl(streamId, kvSerdes.getKey(), kvSerdes.getValue(), isKeyed));
    return new IntermediateMessageStreamImpl<>(this, inputOperators.get(streamId), outputStreams.get(streamId));
}
Also used : Serde(org.apache.samza.serializers.Serde) KVSerde(org.apache.samza.serializers.KVSerde) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) KVSerde(org.apache.samza.serializers.KVSerde) SystemDescriptor(org.apache.samza.system.descriptors.SystemDescriptor) OutputStreamImpl(org.apache.samza.operators.spec.OutputStreamImpl) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) InputTransformer(org.apache.samza.system.descriptors.InputTransformer) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

KVSerde (org.apache.samza.serializers.KVSerde)29 KV (org.apache.samza.operators.KV)16 NoOpSerde (org.apache.samza.serializers.NoOpSerde)16 StringSerde (org.apache.samza.serializers.StringSerde)15 Serde (org.apache.samza.serializers.Serde)14 Test (org.junit.Test)11 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)10 MessageStream (org.apache.samza.operators.MessageStream)10 TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)10 Config (org.apache.samza.config.Config)9 GenericSystemDescriptor (org.apache.samza.system.descriptors.GenericSystemDescriptor)9 HashMap (java.util.HashMap)8 TestLocalTableDescriptor (org.apache.samza.table.descriptors.TestLocalTableDescriptor)8 Duration (java.time.Duration)7 MapConfig (org.apache.samza.config.MapConfig)7 OutputStream (org.apache.samza.operators.OutputStream)7 Windows (org.apache.samza.operators.windows.Windows)7 Table (org.apache.samza.table.Table)7 Map (java.util.Map)6 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)6