Search in sources :

Example 46 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestCoordinatorStreamSystemProducer method testCoordinatorStreamSystemProducer.

@Test
public void testCoordinatorStreamSystemProducer() {
    MockCoordinatorStreamSystemFactory.enableMockConsumerCache();
    String source = "source";
    SystemStream systemStream = new SystemStream("system", "stream");
    MockCoordinatorSystemProducer systemProducer = new MockCoordinatorSystemProducer(source);
    MockSystemAdmin systemAdmin = new MockSystemAdmin();
    CoordinatorStreamSystemProducer producer = new CoordinatorStreamSystemProducer(systemStream, systemProducer, systemAdmin);
    SetConfig setConfig1 = new SetConfig(source, "job.name", "my-job-name");
    SetConfig setConfig2 = new SetConfig(source, "job.id", "1234");
    Delete delete = new Delete(source, "job.name", SetConfig.TYPE);
    assertFalse(systemProducer.isRegistered());
    producer.register(source);
    assertTrue(systemProducer.isRegistered());
    assertFalse(systemProducer.isStarted());
    producer.start();
    assertTrue(systemProducer.isStarted());
    producer.send(setConfig1);
    producer.send(setConfig2);
    producer.send(delete);
    assertFalse(systemProducer.isStopped());
    producer.stop();
    assertTrue(systemProducer.isStopped());
    List<OutgoingMessageEnvelope> envelopes = systemProducer.getEnvelopes();
    OutgoingMessageEnvelope envelope0 = envelopes.get(0);
    OutgoingMessageEnvelope envelope1 = envelopes.get(1);
    OutgoingMessageEnvelope envelope2 = envelopes.get(2);
    TypeReference<Object[]> keyRef = new TypeReference<Object[]>() {
    };
    TypeReference<Map<String, Object>> msgRef = new TypeReference<Map<String, Object>>() {
    };
    assertEquals(3, envelopes.size());
    assertEquals(new CoordinatorStreamMessage(setConfig1), new CoordinatorStreamMessage(deserialize((byte[]) envelope0.getKey(), keyRef), deserialize((byte[]) envelope0.getMessage(), msgRef)));
    assertEquals(new CoordinatorStreamMessage(setConfig2), new CoordinatorStreamMessage(deserialize((byte[]) envelope1.getKey(), keyRef), deserialize((byte[]) envelope1.getMessage(), msgRef)));
    assertEquals(new CoordinatorStreamMessage(delete), new CoordinatorStreamMessage(deserialize((byte[]) envelope2.getKey(), keyRef), deserialize((byte[]) envelope2.getMessage(), msgRef)));
}
Also used : Delete(org.apache.samza.coordinator.stream.messages.Delete) SystemStream(org.apache.samza.system.SystemStream) CoordinatorStreamMessage(org.apache.samza.coordinator.stream.messages.CoordinatorStreamMessage) SetConfig(org.apache.samza.coordinator.stream.messages.SetConfig) TypeReference(com.fasterxml.jackson.core.type.TypeReference) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Map(java.util.Map) Test(org.junit.Test)

Example 47 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestJobModelCalculator method testWithRegexTopicRewriters.

@Test
public void testWithRegexTopicRewriters() {
    // this is the SystemStream that is directly in the config
    SystemStream existingSystemStream = new SystemStream("existingSystem", "existingStream");
    addStreamMetadataCacheMetadata(this.streamMetadataCache, ImmutableMap.of(SYSTEM_STREAM0, buildSystemStreamMetadata(4), SYSTEM_STREAM1, buildSystemStreamMetadata(3), existingSystemStream, buildSystemStreamMetadata(1)));
    Map<TaskName, Integer> changelogPartitionMapping = changelogPartitionMapping(4);
    PowerMockito.mockStatic(ConfigUtil.class);
    // add SYSTEM_STREAM0 for one rewriter
    PowerMockito.when(ConfigUtil.applyRewriter(any(), eq(REGEX_REWRITER0))).thenAnswer(invocation -> addSystemStreamInput(SYSTEM_STREAM0, invocation.getArgumentAt(0, Config.class)));
    // add SYSTEM_STREAM1 for another rewriter
    PowerMockito.when(ConfigUtil.applyRewriter(any(), eq(REGEX_REWRITER1))).thenAnswer(invocation -> addSystemStreamInput(SYSTEM_STREAM1, invocation.getArgumentAt(0, Config.class)));
    Config config = config(ImmutableList.of(existingSystemStream), ImmutableMap.of(JobConfig.CONFIG_REWRITERS, String.format("%s,%s", REGEX_REWRITER0, REGEX_REWRITER1), String.format(JobConfig.CONFIG_REWRITER_CLASS, REGEX_REWRITER0), RegExTopicGenerator.class.getName(), String.format(JobConfig.CONFIG_REWRITER_CLASS, REGEX_REWRITER1), RegExTopicGenerator.class.getName()));
    Set<SystemStreamPartition> sspsForTask0 = ImmutableSet.of(new SystemStreamPartition(SYSTEM_STREAM0, new Partition(0)), new SystemStreamPartition(SYSTEM_STREAM1, new Partition(0)), new SystemStreamPartition(existingSystemStream, new Partition(0)));
    TaskModel taskModel0 = new TaskModel(taskName(0), sspsForTask0, new Partition(0));
    Map<String, ContainerModel> containerModels = ImmutableMap.of("0", new ContainerModel("0", ImmutableMap.of(taskName(0), taskModel0, taskName(2), taskModel(2, 2, 2))), "1", new ContainerModel("1", ImmutableMap.of(taskName(1), taskModel(1, 1, 1), taskName(3), taskModel(3, 3))));
    Map<String, String> expectedConfigMap = new HashMap<>(config);
    expectedConfigMap.put(TaskConfig.INPUT_STREAMS, String.format("%s,%s,%s", taskInputString(existingSystemStream), taskInputString(SYSTEM_STREAM0), taskInputString(SYSTEM_STREAM1)));
    JobModel expected = new JobModel(new MapConfig(expectedConfigMap), containerModels);
    JobModel actual = JobModelCalculator.INSTANCE.calculateJobModel(config, changelogPartitionMapping, this.streamMetadataCache, this.grouperMetadata);
    assertEquals(expected, actual);
}
Also used : SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) MapConfig(org.apache.samza.config.MapConfig) StorageConfig(org.apache.samza.config.StorageConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) TaskConfig(org.apache.samza.config.TaskConfig) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) JobModel(org.apache.samza.job.model.JobModel) MapConfig(org.apache.samza.config.MapConfig) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 48 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class JobModelCalculator method getInputStreamPartitions.

/**
 * Computes the input system stream partitions of a samza job using the provided {@param config}
 * and {@param streamMetadataCache}.
 * @param config the configuration of the job.
 * @param streamMetadataCache to query the partition metadata of the input streams.
 * @return the input {@see SystemStreamPartition} of the samza job.
 */
private static Set<SystemStreamPartition> getInputStreamPartitions(Config config, StreamMetadataCache streamMetadataCache) {
    TaskConfig taskConfig = new TaskConfig(config);
    // Get the set of partitions for each SystemStream from the stream metadata
    Map<SystemStream, SystemStreamMetadata> allMetadata = JavaConverters.mapAsJavaMapConverter(streamMetadataCache.getStreamMetadata(JavaConverters.asScalaSetConverter(taskConfig.getInputStreams()).asScala().toSet(), true)).asJava();
    Set<SystemStreamPartition> inputStreamPartitions = new HashSet<>();
    allMetadata.forEach((systemStream, systemStreamMetadata) -> systemStreamMetadata.getSystemStreamPartitionMetadata().keySet().forEach(partition -> inputStreamPartitions.add(new SystemStreamPartition(systemStream, partition))));
    return inputStreamPartitions;
}
Also used : ConfigException(org.apache.samza.config.ConfigException) StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) ConfigUtil(org.apache.samza.util.ConfigUtil) SystemStreamPartitionMatcher(org.apache.samza.system.SystemStreamPartitionMatcher) SystemStreamPartitionGrouperFactory(org.apache.samza.container.grouper.stream.SystemStreamPartitionGrouperFactory) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) TaskModel(org.apache.samza.job.model.TaskModel) SSPGrouperProxy(org.apache.samza.container.grouper.stream.SSPGrouperProxy) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Function(java.util.function.Function) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) MapConfig(org.apache.samza.config.MapConfig) JobModel(org.apache.samza.job.model.JobModel) RegExTopicGenerator(org.apache.samza.config.RegExTopicGenerator) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) TaskConfig(org.apache.samza.config.TaskConfig) TaskNameGrouperFactory(org.apache.samza.container.grouper.task.TaskNameGrouperFactory) Partition(org.apache.samza.Partition) Set(java.util.Set) Collectors(java.util.stream.Collectors) SystemStreamPartitionGrouper(org.apache.samza.container.grouper.stream.SystemStreamPartitionGrouper) TreeMap(java.util.TreeMap) TaskNameGrouperProxy(org.apache.samza.container.grouper.task.TaskNameGrouperProxy) ReflectionUtil(org.apache.samza.util.ReflectionUtil) ContainerModel(org.apache.samza.job.model.ContainerModel) JavaConverters(scala.collection.JavaConverters) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) Config(org.apache.samza.config.Config) Comparator(java.util.Comparator) SystemStream(org.apache.samza.system.SystemStream) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) TaskConfig(org.apache.samza.config.TaskConfig) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet)

Example 49 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class StreamRegexMonitorFactory method build.

/**
 * Build a {@link StreamRegexMonitor} for input streams for the job model.
 */
public Optional<StreamRegexMonitor> build(JobModel jobModel, Config config, StreamRegexMonitor.Callback callback) {
    JobConfig jobConfig = new JobConfig(config);
    // if input regex monitor is not enabled return empty
    if (jobConfig.getMonitorRegexDisabled()) {
        LOG.info("StreamRegexMonitor is disabled.");
        return Optional.empty();
    }
    Set<SystemStream> inputStreamsToMonitor = JobModelUtil.getSystemStreams(jobModel);
    if (inputStreamsToMonitor.isEmpty()) {
        throw new SamzaException("Input streams to a job can not be empty.");
    }
    // First list all rewriters
    Optional<String> rewritersList = jobConfig.getConfigRewriters();
    // if no rewriter is defined, there is nothing to monitor
    if (!rewritersList.isPresent()) {
        LOG.warn("No config rewriters are defined. No StreamRegexMonitor created.");
        return Optional.empty();
    }
    // Compile a map of each input-system to its corresponding input-monitor-regex patterns
    Map<String, Pattern> inputRegexesToMonitor = jobConfig.getMonitorRegexPatternMap(rewritersList.get());
    // if there are no regexes to monitor
    if (inputRegexesToMonitor.isEmpty()) {
        LOG.info("No input regexes are defined. No StreamRegexMonitor created.");
        return Optional.empty();
    }
    return Optional.of(new StreamRegexMonitor(inputStreamsToMonitor, inputRegexesToMonitor, this.streamMetadataCache, this.metrics, jobConfig.getMonitorRegexFrequency(), callback));
}
Also used : Pattern(java.util.regex.Pattern) SystemStream(org.apache.samza.system.SystemStream) SamzaException(org.apache.samza.SamzaException) JobConfig(org.apache.samza.config.JobConfig)

Example 50 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestOperatorImplGraph method testBroadcastChain.

@Test
public void testBroadcastChain() {
    String inputStreamId = "input";
    String inputSystem = "input-system";
    String inputPhysicalName = "input-stream";
    HashMap<String, String> configMap = new HashMap<>();
    configMap.put(JobConfig.JOB_NAME, "test-job");
    configMap.put(JobConfig.JOB_ID, "1");
    StreamTestUtils.addStreamConfigs(configMap, inputStreamId, inputSystem, inputPhysicalName);
    Config config = new MapConfig(configMap);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
        MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
        inputStream.filter(mock(FilterFunction.class));
        inputStream.map(mock(MapFunction.class));
    }, config);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
    assertEquals(2, inputOpImpl.registeredOperators.size());
    assertTrue(inputOpImpl.registeredOperators.stream().anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.FILTER));
    assertTrue(inputOpImpl.registeredOperators.stream().anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.MAP));
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) FilterFunction(org.apache.samza.operators.functions.FilterFunction) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemStream(org.apache.samza.system.SystemStream) MapFunction(org.apache.samza.operators.functions.MapFunction) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Aggregations

SystemStream (org.apache.samza.system.SystemStream)143 HashMap (java.util.HashMap)75 Test (org.junit.Test)74 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)72 Partition (org.apache.samza.Partition)58 Map (java.util.Map)55 TaskName (org.apache.samza.container.TaskName)52 MapConfig (org.apache.samza.config.MapConfig)49 Config (org.apache.samza.config.Config)46 SystemAdmin (org.apache.samza.system.SystemAdmin)42 SystemAdmins (org.apache.samza.system.SystemAdmins)40 TaskModel (org.apache.samza.job.model.TaskModel)39 Collections (java.util.Collections)37 Set (java.util.Set)37 TaskConfig (org.apache.samza.config.TaskConfig)37 Clock (org.apache.samza.util.Clock)36 File (java.io.File)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)33 TaskMode (org.apache.samza.job.model.TaskMode)32