Search in sources :

Example 61 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestRunner method initializeInMemoryInputStream.

/**
 * Creates an in memory stream with {@link InMemorySystemFactory} and feeds its partition with stream of messages
 * @param partitionData key of the map represents partitionId and value represents messages in the partition
 * @param descriptor describes a stream to initialize with the in memory system
 */
private <StreamMessageType> void initializeInMemoryInputStream(InMemoryInputDescriptor<?> descriptor, Map<Integer, Iterable<StreamMessageType>> partitionData) {
    String systemName = descriptor.getSystemName();
    String streamName = (String) descriptor.getPhysicalName().orElse(descriptor.getStreamId());
    if (this.app instanceof LegacyTaskApplication) {
        // for legacy applications that only specify task.class.
        if (configs.containsKey(TaskConfig.INPUT_STREAMS)) {
            configs.put(TaskConfig.INPUT_STREAMS, configs.get(TaskConfig.INPUT_STREAMS).concat("," + systemName + "." + streamName));
        } else {
            configs.put(TaskConfig.INPUT_STREAMS, systemName + "." + streamName);
        }
    }
    InMemorySystemDescriptor imsd = (InMemorySystemDescriptor) descriptor.getSystemDescriptor();
    imsd.withInMemoryScope(this.inMemoryScope);
    addConfig(descriptor.toConfig());
    addConfig(descriptor.getSystemDescriptor().toConfig());
    addSerdeConfigs(descriptor);
    StreamSpec spec = new StreamSpec(descriptor.getStreamId(), streamName, systemName, partitionData.size());
    SystemFactory factory = new InMemorySystemFactory();
    Config config = new MapConfig(descriptor.toConfig(), descriptor.getSystemDescriptor().toConfig());
    factory.getAdmin(systemName, config).createStream(spec);
    InMemorySystemProducer producer = (InMemorySystemProducer) factory.getProducer(systemName, config, null);
    SystemStream sysStream = new SystemStream(systemName, streamName);
    partitionData.forEach((partitionId, partition) -> {
        partition.forEach(e -> {
            Object key = e instanceof KV ? ((KV) e).getKey() : null;
            Object value = e instanceof KV ? ((KV) e).getValue() : e;
            if (value instanceof IncomingMessageEnvelope) {
                producer.send((IncomingMessageEnvelope) value);
            } else {
                producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), key, value));
            }
        });
        producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), null, new EndOfStreamMessage(null)));
    });
}
Also used : StreamSpec(org.apache.samza.system.StreamSpec) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) SystemFactory(org.apache.samza.system.SystemFactory) MapConfig(org.apache.samza.config.MapConfig) InMemorySystemConfig(org.apache.samza.config.InMemorySystemConfig) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) StreamConfig(org.apache.samza.config.StreamConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) TaskConfig(org.apache.samza.config.TaskConfig) SystemStream(org.apache.samza.system.SystemStream) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) KV(org.apache.samza.operators.KV) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) MapConfig(org.apache.samza.config.MapConfig) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) InMemorySystemProducer(org.apache.samza.system.inmemory.InMemorySystemProducer)

Example 62 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestSamzaSqlRelMessageSerde method testNestedRecordConversion.

@Test
public void testNestedRecordConversion() {
    Map<String, String> props = new HashMap<>();
    SystemStream ss1 = new SystemStream("test", "nestedRecord");
    props.put(String.format(ConfigBasedAvroRelSchemaProviderFactory.CFG_SOURCE_SCHEMA, ss1.getSystem(), ss1.getStream()), Profile.SCHEMA$.toString());
    ConfigBasedAvroRelSchemaProviderFactory factory = new ConfigBasedAvroRelSchemaProviderFactory();
    AvroRelSchemaProvider nestedRecordSchemaProvider = (AvroRelSchemaProvider) factory.create(ss1, new MapConfig(props));
    AvroRelConverter nestedRecordAvroRelConverter = new AvroRelConverter(ss1, nestedRecordSchemaProvider, new MapConfig());
    Pair<SamzaSqlRelMessage, GenericData.Record> messageRecordPair = createNestedSamzaSqlRelMessage(nestedRecordAvroRelConverter);
    SamzaSqlRelMessageSerde serde = (SamzaSqlRelMessageSerde) new SamzaSqlRelMessageSerdeFactory().getSerde(null, null);
    SamzaSqlRelMessage resultMsg = serde.fromBytes(serde.toBytes(messageRecordPair.getKey()));
    KV<Object, Object> samzaMessage = nestedRecordAvroRelConverter.convertToSamzaMessage(resultMsg);
    GenericRecord recordPostConversion = (GenericRecord) samzaMessage.getValue();
    for (Schema.Field field : Profile.SCHEMA$.getFields()) {
        // equals() on GenericRecord does the nested record equality check as well.
        Assert.assertEquals(messageRecordPair.getValue().get(field.name()), recordPostConversion.get(field.name()));
    }
}
Also used : HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) Schema(org.apache.avro.Schema) SamzaSqlRelMessageSerde(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde) AvroRelConverter(org.apache.samza.sql.avro.AvroRelConverter) AvroRelSchemaProvider(org.apache.samza.sql.avro.AvroRelSchemaProvider) ConfigBasedAvroRelSchemaProviderFactory(org.apache.samza.sql.avro.ConfigBasedAvroRelSchemaProviderFactory) GenericRecord(org.apache.avro.generic.GenericRecord) AddressRecord(org.apache.samza.sql.avro.schemas.AddressRecord) StreetNumRecord(org.apache.samza.sql.avro.schemas.StreetNumRecord) MapConfig(org.apache.samza.config.MapConfig) GenericRecord(org.apache.avro.generic.GenericRecord) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) Test(org.junit.Test)

Example 63 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestSamzaSqlRelRecordSerde method testNestedRecordConversion.

@Test
public void testNestedRecordConversion() {
    Map<String, String> props = new HashMap<>();
    SystemStream ss1 = new SystemStream("test", "nestedRecord");
    props.put(String.format(ConfigBasedAvroRelSchemaProviderFactory.CFG_SOURCE_SCHEMA, ss1.getSystem(), ss1.getStream()), Profile.SCHEMA$.toString());
    ConfigBasedAvroRelSchemaProviderFactory factory = new ConfigBasedAvroRelSchemaProviderFactory();
    AvroRelSchemaProvider nestedRecordSchemaProvider = (AvroRelSchemaProvider) factory.create(ss1, new MapConfig(props));
    AvroRelConverter nestedRecordAvroRelConverter = new AvroRelConverter(ss1, nestedRecordSchemaProvider, new MapConfig());
    Pair<SamzaSqlRelMessage, GenericData.Record> messageRecordPair = TestSamzaSqlRelMessageSerde.createNestedSamzaSqlRelMessage(nestedRecordAvroRelConverter);
    SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde serde = (SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde) new SamzaSqlRelRecordSerdeFactory().getSerde(null, null);
    SamzaSqlRelRecord resultRecord = serde.fromBytes(serde.toBytes(messageRecordPair.getKey().getSamzaSqlRelRecord()));
    GenericData.Record recordPostConversion = (GenericData.Record) nestedRecordAvroRelConverter.convertToAvroObject(resultRecord, Profile.SCHEMA$);
    for (Schema.Field field : Profile.SCHEMA$.getFields()) {
        // equals() on GenericRecord does the nested record equality check as well.
        Assert.assertEquals(messageRecordPair.getValue().get(field.name()), recordPostConversion.get(field.name()));
    }
}
Also used : HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) Schema(org.apache.avro.Schema) GenericData(org.apache.avro.generic.GenericData) AvroRelConverter(org.apache.samza.sql.avro.AvroRelConverter) SamzaSqlRelRecord(org.apache.samza.sql.SamzaSqlRelRecord) AvroRelSchemaProvider(org.apache.samza.sql.avro.AvroRelSchemaProvider) SamzaSqlRelRecordSerde(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde) ConfigBasedAvroRelSchemaProviderFactory(org.apache.samza.sql.avro.ConfigBasedAvroRelSchemaProviderFactory) SamzaSqlRelRecordSerde(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde) SamzaSqlRelRecord(org.apache.samza.sql.SamzaSqlRelRecord) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) Test(org.junit.Test)

Example 64 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestSamzaSqlRemoteTableJoinFunction method testWithInnerJoinWithTableOnRight.

@Test
public void testWithInnerJoinWithTableOnRight() {
    Map<String, String> props = new HashMap<>();
    SystemStream ss = new SystemStream("test", "nestedRecord");
    props.put(String.format(ConfigBasedAvroRelSchemaProviderFactory.CFG_SOURCE_SCHEMA, ss.getSystem(), ss.getStream()), SimpleRecord.SCHEMA$.toString());
    ConfigBasedAvroRelSchemaProviderFactory factory = new ConfigBasedAvroRelSchemaProviderFactory();
    AvroRelSchemaProvider schemaProvider = (AvroRelSchemaProvider) factory.create(ss, new MapConfig(props));
    AvroRelConverter relConverter = new AvroRelConverter(ss, schemaProvider, new MapConfig());
    SamzaRelTableKeyConverter relTableKeyConverter = new SampleRelTableKeyConverter();
    String remoteTableName = "testDb.testTable.$table";
    GenericData.Record tableRecord = new GenericData.Record(SimpleRecord.SCHEMA$);
    tableRecord.put("id", 1);
    tableRecord.put("name", "name1");
    SamzaSqlRelMessage streamMsg = new SamzaSqlRelMessage(streamFieldNames, streamFieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
    SamzaSqlRelMessage tableMsg = relConverter.convertToRelMessage(new KV(tableRecord.get("id"), tableRecord));
    JoinRelType joinRelType = JoinRelType.INNER;
    List<Integer> streamKeyIds = Arrays.asList(1);
    List<Integer> tableKeyIds = Arrays.asList(0);
    KV<Object, GenericRecord> record = KV.of(tableRecord.get("id"), tableRecord);
    JoinInputNode mockTableInputNode = mock(JoinInputNode.class);
    when(mockTableInputNode.getKeyIds()).thenReturn(tableKeyIds);
    when(mockTableInputNode.isPosOnRight()).thenReturn(true);
    when(mockTableInputNode.getFieldNames()).thenReturn(tableMsg.getSamzaSqlRelRecord().getFieldNames());
    when(mockTableInputNode.getSourceName()).thenReturn(remoteTableName);
    JoinInputNode mockStreamInputNode = mock(JoinInputNode.class);
    when(mockStreamInputNode.getKeyIds()).thenReturn(streamKeyIds);
    when(mockStreamInputNode.isPosOnRight()).thenReturn(false);
    when(mockStreamInputNode.getFieldNames()).thenReturn(streamFieldNames);
    SamzaSqlRemoteTableJoinFunction joinFn = new SamzaSqlRemoteTableJoinFunction(relConverter, relTableKeyConverter, mockStreamInputNode, mockTableInputNode, joinRelType, 0);
    SamzaSqlRelMessage outMsg = joinFn.apply(streamMsg, record);
    Assert.assertEquals(outMsg.getSamzaSqlRelRecord().getFieldValues().size(), outMsg.getSamzaSqlRelRecord().getFieldNames().size());
    List<String> expectedFieldNames = new ArrayList<>(streamFieldNames);
    expectedFieldNames.addAll(tableMsg.getSamzaSqlRelRecord().getFieldNames());
    List<Object> expectedFieldValues = new ArrayList<>(streamFieldValues);
    expectedFieldValues.addAll(tableMsg.getSamzaSqlRelRecord().getFieldValues());
    Assert.assertEquals(expectedFieldNames, outMsg.getSamzaSqlRelRecord().getFieldNames());
    Assert.assertEquals(expectedFieldValues, outMsg.getSamzaSqlRelRecord().getFieldValues());
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AvroRelConverter(org.apache.samza.sql.avro.AvroRelConverter) SampleRelTableKeyConverter(org.apache.samza.sql.util.SampleRelTableKeyConverter) GenericRecord(org.apache.avro.generic.GenericRecord) SimpleRecord(org.apache.samza.sql.avro.schemas.SimpleRecord) MapConfig(org.apache.samza.config.MapConfig) GenericRecord(org.apache.avro.generic.GenericRecord) SamzaRelTableKeyConverter(org.apache.samza.sql.interfaces.SamzaRelTableKeyConverter) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) SystemStream(org.apache.samza.system.SystemStream) KV(org.apache.samza.operators.KV) GenericData(org.apache.avro.generic.GenericData) JoinRelType(org.apache.calcite.rel.core.JoinRelType) AvroRelSchemaProvider(org.apache.samza.sql.avro.AvroRelSchemaProvider) ConfigBasedAvroRelSchemaProviderFactory(org.apache.samza.sql.avro.ConfigBasedAvroRelSchemaProviderFactory) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) Test(org.junit.Test)

Example 65 with SystemStream

use of org.apache.samza.system.SystemStream in project beam by apache.

the class TranslationContext method createDummyStreamDescriptor.

/**
 * The dummy stream created will only be used in Beam tests.
 */
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
    final GenericSystemDescriptor dummySystem = new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
    final GenericInputDescriptor<OpMessage<String>> dummyInput = dummySystem.getInputDescriptor(id, new NoOpSerde<>());
    dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
    final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
    final SystemFactory factory = new InMemorySystemFactory();
    final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
    factory.getAdmin(id, config).createStream(dummyStreamSpec);
    final SystemProducer producer = factory.getProducer(id, config, null);
    final SystemStream sysStream = new SystemStream(id, id);
    final Consumer<Object> sendFn = (msg) -> {
        producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
    };
    final WindowedValue<String> windowedValue = WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());
    sendFn.accept(OpMessage.ofElement(windowedValue));
    sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
    sendFn.accept(new EndOfStreamMessage(null));
    return dummyInput;
}
Also used : InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) TransformInputs(org.apache.beam.runners.core.construction.TransformInputs) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) TupleTag(org.apache.beam.sdk.values.TupleTag) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) OutputDescriptor(org.apache.samza.system.descriptors.OutputDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) Logger(org.slf4j.Logger) Set(java.util.Set) SystemFactory(org.apache.samza.system.SystemFactory) StreamSpec(org.apache.samza.system.StreamSpec) UUID(java.util.UUID) PCollection(org.apache.beam.sdk.values.PCollection) HashIdGenerator(org.apache.beam.runners.samza.util.HashIdGenerator) Consumer(java.util.function.Consumer) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) List(java.util.List) PValue(org.apache.beam.sdk.values.PValue) SystemProducer(org.apache.samza.system.SystemProducer) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) Config(org.apache.samza.config.Config) Collections(java.util.Collections) OutputStream(org.apache.samza.operators.OutputStream) StreamSpec(org.apache.samza.system.StreamSpec) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) SystemFactory(org.apache.samza.system.SystemFactory) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) SystemProducer(org.apache.samza.system.SystemProducer) SystemStream(org.apache.samza.system.SystemStream) Instant(org.joda.time.Instant) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MapConfig(org.apache.samza.config.MapConfig) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory)

Aggregations

SystemStream (org.apache.samza.system.SystemStream)143 HashMap (java.util.HashMap)75 Test (org.junit.Test)74 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)72 Partition (org.apache.samza.Partition)58 Map (java.util.Map)55 TaskName (org.apache.samza.container.TaskName)52 MapConfig (org.apache.samza.config.MapConfig)49 Config (org.apache.samza.config.Config)46 SystemAdmin (org.apache.samza.system.SystemAdmin)42 SystemAdmins (org.apache.samza.system.SystemAdmins)40 TaskModel (org.apache.samza.job.model.TaskModel)39 Collections (java.util.Collections)37 Set (java.util.Set)37 TaskConfig (org.apache.samza.config.TaskConfig)37 Clock (org.apache.samza.util.Clock)36 File (java.io.File)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)33 TaskMode (org.apache.samza.job.model.TaskMode)32