Search in sources :

Example 6 with StreamTableJoinOperatorSpec

use of org.apache.samza.operators.spec.StreamTableJoinOperatorSpec in project samza by apache.

the class TestMessageStreamImpl method testStreamTableJoin.

@Test
public void testStreamTableJoin() {
    StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
    OperatorSpec leftInputOpSpec = mock(OperatorSpec.class);
    MessageStreamImpl<KV<String, TestMessageEnvelope>> source1 = new MessageStreamImpl<>(mockGraph, leftInputOpSpec);
    OperatorSpec rightInputOpSpec = mock(OperatorSpec.class);
    MessageStreamImpl<TestMessageEnvelope> source2 = new MessageStreamImpl<>(mockGraph, rightInputOpSpec);
    TableImpl table = new TableImpl("t1");
    source2.sendTo(table);
    StreamTableJoinFunction<String, KV<String, TestMessageEnvelope>, KV<String, TestMessageEnvelope>, TestOutputMessageEnvelope> mockJoinFn = mock(StreamTableJoinFunction.class);
    source1.join(table, mockJoinFn);
    ArgumentCaptor<OperatorSpec> leftRegisteredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
    verify(leftInputOpSpec).registerNextOperatorSpec(leftRegisteredOpCaptor.capture());
    OperatorSpec<?, TestMessageEnvelope> leftRegisteredOpSpec = leftRegisteredOpCaptor.getValue();
    assertTrue(leftRegisteredOpSpec instanceof StreamTableJoinOperatorSpec);
    StreamTableJoinOperatorSpec joinOpSpec = (StreamTableJoinOperatorSpec) leftRegisteredOpSpec;
    assertEquals(OpCode.JOIN, joinOpSpec.getOpCode());
    assertEquals(mockJoinFn, joinOpSpec.getJoinFn());
    assertEquals(table.getTableId(), joinOpSpec.getTableId());
}
Also used : IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TestOutputMessageEnvelope(org.apache.samza.operators.data.TestOutputMessageEnvelope) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) Test(org.junit.Test)

Example 7 with StreamTableJoinOperatorSpec

use of org.apache.samza.operators.spec.StreamTableJoinOperatorSpec in project samza by apache.

the class OperatorImplGraph method createOperatorImpl.

/**
 * Creates a new {@link OperatorImpl} instance for the provided {@link OperatorSpec}.
 *
 * @param prevOperatorSpec the original {@link OperatorSpec} that produces output for {@code operatorSpec} from {@link OperatorSpecGraph}
 * @param operatorSpec  the original {@link OperatorSpec} from {@link OperatorSpecGraph}
 * @param context  the {@link Context} required to instantiate operators
 * @return  the {@link OperatorImpl} implementation instance
 */
OperatorImpl createOperatorImpl(OperatorSpec prevOperatorSpec, OperatorSpec operatorSpec, Context context) {
    Config config = context.getJobContext().getConfig();
    StreamConfig streamConfig = new StreamConfig(config);
    if (operatorSpec instanceof InputOperatorSpec) {
        return new InputOperatorImpl((InputOperatorSpec) operatorSpec);
    } else if (operatorSpec instanceof StreamOperatorSpec) {
        return new FlatmapOperatorImpl((StreamOperatorSpec) operatorSpec);
    } else if (operatorSpec instanceof SinkOperatorSpec) {
        return new SinkOperatorImpl((SinkOperatorSpec) operatorSpec);
    } else if (operatorSpec instanceof OutputOperatorSpec) {
        String streamId = ((OutputOperatorSpec) operatorSpec).getOutputStream().getStreamId();
        SystemStream systemStream = streamConfig.streamIdToSystemStream(streamId);
        return new OutputOperatorImpl((OutputOperatorSpec) operatorSpec, systemStream);
    } else if (operatorSpec instanceof PartitionByOperatorSpec) {
        String streamId = ((PartitionByOperatorSpec) operatorSpec).getOutputStream().getStreamId();
        SystemStream systemStream = streamConfig.streamIdToSystemStream(streamId);
        return new PartitionByOperatorImpl((PartitionByOperatorSpec) operatorSpec, systemStream, internalTaskContext);
    } else if (operatorSpec instanceof WindowOperatorSpec) {
        return new WindowOperatorImpl((WindowOperatorSpec) operatorSpec, clock);
    } else if (operatorSpec instanceof JoinOperatorSpec) {
        return getOrCreatePartialJoinOpImpls((JoinOperatorSpec) operatorSpec, prevOperatorSpec.equals(((JoinOperatorSpec) operatorSpec).getLeftInputOpSpec()), clock);
    } else if (operatorSpec instanceof StreamTableJoinOperatorSpec) {
        return new StreamTableJoinOperatorImpl((StreamTableJoinOperatorSpec) operatorSpec, context);
    } else if (operatorSpec instanceof SendToTableOperatorSpec) {
        return new SendToTableOperatorImpl((SendToTableOperatorSpec) operatorSpec, context);
    } else if (operatorSpec instanceof SendToTableWithUpdateOperatorSpec) {
        return new SendToTableWithUpdateOperatorImpl((SendToTableWithUpdateOperatorSpec) operatorSpec, context);
    } else if (operatorSpec instanceof BroadcastOperatorSpec) {
        String streamId = ((BroadcastOperatorSpec) operatorSpec).getOutputStream().getStreamId();
        SystemStream systemStream = streamConfig.streamIdToSystemStream(streamId);
        return new BroadcastOperatorImpl((BroadcastOperatorSpec) operatorSpec, systemStream, context);
    } else if (operatorSpec instanceof AsyncFlatMapOperatorSpec) {
        return new AsyncFlatmapOperatorImpl((AsyncFlatMapOperatorSpec) operatorSpec);
    }
    throw new IllegalArgumentException(String.format("Unsupported OperatorSpec: %s", operatorSpec.getClass().getName()));
}
Also used : StreamConfig(org.apache.samza.config.StreamConfig) Config(org.apache.samza.config.Config) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) BroadcastOperatorSpec(org.apache.samza.operators.spec.BroadcastOperatorSpec) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) SystemStream(org.apache.samza.system.SystemStream) StreamConfig(org.apache.samza.config.StreamConfig) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) SendToTableWithUpdateOperatorSpec(org.apache.samza.operators.spec.SendToTableWithUpdateOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) AsyncFlatMapOperatorSpec(org.apache.samza.operators.spec.AsyncFlatMapOperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec)

Example 8 with StreamTableJoinOperatorSpec

use of org.apache.samza.operators.spec.StreamTableJoinOperatorSpec in project samza by apache.

the class JobGraphJsonGenerator method operatorToMap.

/**
 * Format the operator properties into a map
 * @param spec a {@link OperatorSpec} instance
 * @return map of the operator properties
 */
@VisibleForTesting
Map<String, Object> operatorToMap(OperatorSpec spec) {
    Map<String, Object> map = new HashMap<>();
    map.put("opCode", spec.getOpCode().name());
    map.put("opId", spec.getOpId());
    map.put("sourceLocation", spec.getSourceLocation());
    Collection<OperatorSpec> nextOperators = spec.getRegisteredOperatorSpecs();
    map.put("nextOperatorIds", nextOperators.stream().map(OperatorSpec::getOpId).collect(Collectors.toSet()));
    if (spec instanceof OutputOperatorSpec) {
        OutputStreamImpl outputStream = ((OutputOperatorSpec) spec).getOutputStream();
        map.put("outputStreamId", outputStream.getStreamId());
    } else if (spec instanceof PartitionByOperatorSpec) {
        OutputStreamImpl outputStream = ((PartitionByOperatorSpec) spec).getOutputStream();
        map.put("outputStreamId", outputStream.getStreamId());
    }
    if (spec instanceof StreamTableJoinOperatorSpec) {
        String tableId = ((StreamTableJoinOperatorSpec) spec).getTableId();
        map.put("tableId", tableId);
    }
    if (spec instanceof SendToTableOperatorSpec) {
        String tableId = ((SendToTableOperatorSpec) spec).getTableId();
        map.put("tableId", tableId);
    }
    if (spec instanceof JoinOperatorSpec) {
        map.put("ttlMs", ((JoinOperatorSpec) spec).getTtlMs());
    }
    return map;
}
Also used : PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) OutputStreamImpl(org.apache.samza.operators.spec.OutputStreamImpl) HashMap(java.util.HashMap) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 9 with StreamTableJoinOperatorSpec

use of org.apache.samza.operators.spec.StreamTableJoinOperatorSpec in project samza by apache.

the class ExecutionPlanner method groupJoinedStreams.

/**
 * Groups streams participating in joins together.
 */
private static List<StreamSet> groupJoinedStreams(JobGraph jobGraph) {
    // Group input operator specs (input/intermediate streams) by the joins they participate in.
    Multimap<OperatorSpec, InputOperatorSpec> joinOpSpecToInputOpSpecs = OperatorSpecGraphAnalyzer.getJoinToInputOperatorSpecs(jobGraph.getApplicationDescriptorImpl().getInputOperators().values());
    Map<String, TableDescriptor> tableDescriptors = jobGraph.getTables().stream().collect(Collectors.toMap(TableDescriptor::getTableId, Function.identity()));
    // Convert every group of input operator specs into a group of corresponding stream edges.
    List<StreamSet> streamSets = new ArrayList<>();
    for (OperatorSpec joinOpSpec : joinOpSpecToInputOpSpecs.keySet()) {
        Collection<InputOperatorSpec> joinedInputOpSpecs = joinOpSpecToInputOpSpecs.get(joinOpSpec);
        StreamSet streamSet = getStreamSet(joinOpSpec.getOpId(), joinedInputOpSpecs, jobGraph);
        // streams associated with the joined table (if any).
        if (joinOpSpec instanceof StreamTableJoinOperatorSpec) {
            StreamTableJoinOperatorSpec streamTableJoinOperatorSpec = (StreamTableJoinOperatorSpec) joinOpSpec;
            TableDescriptor tableDescriptor = tableDescriptors.get(streamTableJoinOperatorSpec.getTableId());
            if (tableDescriptor instanceof LocalTableDescriptor) {
                LocalTableDescriptor localTableDescriptor = (LocalTableDescriptor) tableDescriptor;
                Collection<String> sideInputs = ListUtils.emptyIfNull(localTableDescriptor.getSideInputs());
                Iterable<StreamEdge> sideInputStreams = sideInputs.stream().map(jobGraph::getStreamEdge)::iterator;
                Iterable<StreamEdge> streams = streamSet.getStreamEdges();
                streamSet = new StreamSet(streamSet.getSetId(), Iterables.concat(streams, sideInputStreams));
            }
        }
        streamSets.add(streamSet);
    }
    return Collections.unmodifiableList(streamSets);
}
Also used : InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) ArrayList(java.util.ArrayList) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec)

Aggregations

StreamTableJoinOperatorSpec (org.apache.samza.operators.spec.StreamTableJoinOperatorSpec)9 InputOperatorSpec (org.apache.samza.operators.spec.InputOperatorSpec)5 JoinOperatorSpec (org.apache.samza.operators.spec.JoinOperatorSpec)5 OperatorSpec (org.apache.samza.operators.spec.OperatorSpec)5 SendToTableOperatorSpec (org.apache.samza.operators.spec.SendToTableOperatorSpec)5 Context (org.apache.samza.context.Context)3 OutputOperatorSpec (org.apache.samza.operators.spec.OutputOperatorSpec)3 PartitionByOperatorSpec (org.apache.samza.operators.spec.PartitionByOperatorSpec)3 ArrayList (java.util.ArrayList)2 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)2 MockContext (org.apache.samza.context.MockContext)2 KV (org.apache.samza.operators.KV)2 TestMessageEnvelope (org.apache.samza.operators.data.TestMessageEnvelope)2 OutputStreamImpl (org.apache.samza.operators.spec.OutputStreamImpl)2 SinkOperatorSpec (org.apache.samza.operators.spec.SinkOperatorSpec)2 StreamOperatorSpec (org.apache.samza.operators.spec.StreamOperatorSpec)2 WindowOperatorSpec (org.apache.samza.operators.spec.WindowOperatorSpec)2 IntermediateMessageStreamImpl (org.apache.samza.operators.stream.IntermediateMessageStreamImpl)2 Test (org.junit.Test)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1