Search in sources :

Example 1 with InputDescriptor

use of org.apache.samza.system.descriptors.InputDescriptor in project samza by apache.

the class ScanTranslator method translate.

// ScanMapFunction
void translate(final TableScan tableScan, final String queryLogicalId, final String logicalOpId, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<SamzaSqlInputMessage>> inputMsgStreams) {
    StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor();
    List<String> tableNameParts = tableScan.getTable().getQualifiedName();
    String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts);
    Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName));
    SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName);
    final String systemName = sqlIOConfig.getSystemName();
    final String streamId = sqlIOConfig.getStreamId();
    final String source = sqlIOConfig.getSource();
    final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor().get() instanceof CachingTableDescriptor);
    // descriptor to load the local table.
    if (isRemoteTable) {
        return;
    }
    // set the wrapper input transformer (SamzaSqlInputTransformer) in system descriptor
    DelegatingSystemDescriptor systemDescriptor = systemDescriptors.get(systemName);
    if (systemDescriptor == null) {
        systemDescriptor = new DelegatingSystemDescriptor(systemName, new SamzaSqlInputTransformer());
        systemDescriptors.put(systemName, systemDescriptor);
    } else {
        /* in SamzaSQL, there should be no systemDescriptor setup by user, so this branch happens only
       * in case of Fan-OUT (i.e., same input stream used in multiple sql statements), or when same input
       * used twice in same sql statement (e.g., select ... from input as i1, input as i2 ...), o.w., throw error */
        if (systemDescriptor.getTransformer().isPresent()) {
            InputTransformer existingTransformer = systemDescriptor.getTransformer().get();
            if (!(existingTransformer instanceof SamzaSqlInputTransformer)) {
                throw new SamzaException("SamzaSQL Exception: existing transformer for " + systemName + " is not SamzaSqlInputTransformer");
            }
        }
    }
    InputDescriptor inputDescriptor = systemDescriptor.getInputDescriptor(streamId, new NoOpSerde<>());
    if (!inputMsgStreams.containsKey(source)) {
        MessageStream<SamzaSqlInputMessage> inputMsgStream = streamAppDesc.getInputStream(inputDescriptor);
        inputMsgStreams.put(source, inputMsgStream.map(new SystemMessageMapperFunction(source, queryId)));
    }
    MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputMsgStreams.get(source).filter(new FilterSystemMessageFunction(sourceName, queryId)).map(new ScanMapFunction(sourceName, queryId, queryLogicalId, logicalOpId));
    context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream);
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaSqlInputTransformer(org.apache.samza.sql.SamzaSqlInputTransformer) InputTransformer(org.apache.samza.system.descriptors.InputTransformer) SamzaException(org.apache.samza.SamzaException) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlInputTransformer(org.apache.samza.sql.SamzaSqlInputTransformer) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 2 with InputDescriptor

use of org.apache.samza.system.descriptors.InputDescriptor in project beam by apache.

the class TranslationContext method createDummyStreamDescriptor.

/**
 * The dummy stream created will only be used in Beam tests.
 */
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
    final GenericSystemDescriptor dummySystem = new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
    final GenericInputDescriptor<OpMessage<String>> dummyInput = dummySystem.getInputDescriptor(id, new NoOpSerde<>());
    dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
    final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
    final SystemFactory factory = new InMemorySystemFactory();
    final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
    factory.getAdmin(id, config).createStream(dummyStreamSpec);
    final SystemProducer producer = factory.getProducer(id, config, null);
    final SystemStream sysStream = new SystemStream(id, id);
    final Consumer<Object> sendFn = (msg) -> {
        producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
    };
    final WindowedValue<String> windowedValue = WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());
    sendFn.accept(OpMessage.ofElement(windowedValue));
    sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
    sendFn.accept(new EndOfStreamMessage(null));
    return dummyInput;
}
Also used : InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) TransformInputs(org.apache.beam.runners.core.construction.TransformInputs) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) TupleTag(org.apache.beam.sdk.values.TupleTag) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) OutputDescriptor(org.apache.samza.system.descriptors.OutputDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) Logger(org.slf4j.Logger) Set(java.util.Set) SystemFactory(org.apache.samza.system.SystemFactory) StreamSpec(org.apache.samza.system.StreamSpec) UUID(java.util.UUID) PCollection(org.apache.beam.sdk.values.PCollection) HashIdGenerator(org.apache.beam.runners.samza.util.HashIdGenerator) Consumer(java.util.function.Consumer) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) List(java.util.List) PValue(org.apache.beam.sdk.values.PValue) SystemProducer(org.apache.samza.system.SystemProducer) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) Config(org.apache.samza.config.Config) Collections(java.util.Collections) OutputStream(org.apache.samza.operators.OutputStream) StreamSpec(org.apache.samza.system.StreamSpec) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory) SystemFactory(org.apache.samza.system.SystemFactory) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) SystemProducer(org.apache.samza.system.SystemProducer) SystemStream(org.apache.samza.system.SystemStream) Instant(org.joda.time.Instant) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MapConfig(org.apache.samza.config.MapConfig) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) InMemorySystemFactory(org.apache.samza.system.inmemory.InMemorySystemFactory)

Example 3 with InputDescriptor

use of org.apache.samza.system.descriptors.InputDescriptor in project samza by apache.

the class TestStreamApplicationDescriptorImpl method testGetInputStreamWithExpandingSystem.

@Test
public void testGetInputStreamWithExpandingSystem() {
    String streamId = "test-stream-1";
    String expandedStreamId = "expanded-stream";
    AtomicInteger expandCallCount = new AtomicInteger();
    StreamExpander expander = (sg, isd) -> {
        expandCallCount.incrementAndGet();
        InputDescriptor expandedISD = new GenericSystemDescriptor("expanded-system", "mockFactoryClass").getInputDescriptor(expandedStreamId, new IntegerSerde());
        return sg.getInputStream(expandedISD);
    };
    MockExpandingSystemDescriptor sd = new MockExpandingSystemDescriptor("mock-system", expander);
    MockInputDescriptor isd = sd.getInputDescriptor(streamId, new IntegerSerde());
    StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
        appDesc.getInputStream(isd);
    }, getConfig());
    InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(expandedStreamId);
    assertEquals(OpCode.INPUT, inputOpSpec.getOpCode());
    assertEquals(1, expandCallCount.get());
    assertFalse(streamAppDesc.getInputOperators().containsKey(streamId));
    assertFalse(streamAppDesc.getInputDescriptors().containsKey(streamId));
    assertTrue(streamAppDesc.getInputDescriptors().containsKey(expandedStreamId));
    assertEquals(expandedStreamId, inputOpSpec.getStreamId());
}
Also used : SystemDescriptor(org.apache.samza.system.descriptors.SystemDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) AtomicReference(java.util.concurrent.atomic.AtomicReference) TableImpl(org.apache.samza.operators.TableImpl) ArrayList(java.util.ArrayList) OutputStreamImpl(org.apache.samza.operators.spec.OutputStreamImpl) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) BaseTableDescriptor(org.apache.samza.table.descriptors.BaseTableDescriptor) ImmutableList(com.google.common.collect.ImmutableList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ApplicationConfig(org.apache.samza.config.ApplicationConfig) InputTransformer(org.apache.samza.system.descriptors.InputTransformer) ProcessorLifecycleListenerFactory(org.apache.samza.runtime.ProcessorLifecycleListenerFactory) Assert.fail(org.junit.Assert.fail) MapConfig(org.apache.samza.config.MapConfig) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Mockito.doReturn(org.mockito.Mockito.doReturn) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) ApplicationContainerContextFactory(org.apache.samza.context.ApplicationContainerContextFactory) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) TransformingInputDescriptorProvider(org.apache.samza.system.descriptors.TransformingInputDescriptorProvider) SamzaException(org.apache.samza.SamzaException) ApplicationTaskContextFactory(org.apache.samza.context.ApplicationTaskContextFactory) ExpandingInputDescriptorProvider(org.apache.samza.system.descriptors.ExpandingInputDescriptorProvider) StreamExpander(org.apache.samza.system.descriptors.StreamExpander) Mockito.verify(org.mockito.Mockito.verify) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Optional(java.util.Optional) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) Assert.assertEquals(org.junit.Assert.assertEquals) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) Mockito.mock(org.mockito.Mockito.mock) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StreamExpander(org.apache.samza.system.descriptors.StreamExpander) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) IntegerSerde(org.apache.samza.serializers.IntegerSerde) Test(org.junit.Test)

Aggregations

InputDescriptor (org.apache.samza.system.descriptors.InputDescriptor)3 HashMap (java.util.HashMap)2 List (java.util.List)2 SamzaException (org.apache.samza.SamzaException)2 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)2 Config (org.apache.samza.config.Config)2 MapConfig (org.apache.samza.config.MapConfig)2 NoOpSerde (org.apache.samza.serializers.NoOpSerde)2 ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 Optional (java.util.Optional)1 Set (java.util.Set)1 UUID (java.util.UUID)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Consumer (java.util.function.Consumer)1 TransformInputs (org.apache.beam.runners.core.construction.TransformInputs)1