use of org.apache.samza.operators.spec.JoinOperatorSpec in project samza by apache.
the class OperatorSpecGraphAnalyzer method getJoinToInputOperatorSpecs.
/**
* Returns a grouping of {@link InputOperatorSpec}s by the joins, i.e. {@link JoinOperatorSpec}s and
* {@link StreamTableJoinOperatorSpec}s, they participate in.
*
* The key of the returned Multimap is of type {@link OperatorSpec} due to the lack of a stricter
* base type for {@link JoinOperatorSpec} and {@link StreamTableJoinOperatorSpec}. However, key
* objects are guaranteed to be of either type only.
*/
public static Multimap<OperatorSpec, InputOperatorSpec> getJoinToInputOperatorSpecs(Collection<InputOperatorSpec> inputOpSpecs) {
Multimap<OperatorSpec, InputOperatorSpec> joinToInputOpSpecs = HashMultimap.create();
// Create a getNextOpSpecs() function that emulates connections between every SendToTableOperatorSpec
// — which are terminal OperatorSpecs — and all StreamTableJoinOperatorSpecs referencing the same table.
//
// This is necessary to support Stream-Table Join scenarios because it allows us to associate streams behind
// SendToTableOperatorSpecs with streams participating in Stream-Table Joins, a connection that would not be
// easy to make otherwise since SendToTableOperatorSpecs are terminal operator specs.
Function<OperatorSpec, Iterable<OperatorSpec>> getNextOpSpecs = getCustomGetNextOpSpecs(inputOpSpecs);
// and join-related operator specs.
for (InputOperatorSpec inputOpSpec : inputOpSpecs) {
// Observe all join-related operator specs reachable from this input operator spec.
JoinVisitor joinVisitor = new JoinVisitor();
traverse(inputOpSpec, joinVisitor, getNextOpSpecs);
// Associate every encountered join-related operator spec with this input operator spec.
for (OperatorSpec joinOpSpec : joinVisitor.getJoins()) {
joinToInputOpSpecs.put(joinOpSpec, inputOpSpec);
}
}
return joinToInputOpSpecs;
}
use of org.apache.samza.operators.spec.JoinOperatorSpec in project samza by apache.
the class TestMessageStreamImpl method testJoin.
@Test
public void testJoin() {
StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
OperatorSpec leftInputOpSpec = mock(OperatorSpec.class);
MessageStreamImpl<TestMessageEnvelope> source1 = new MessageStreamImpl<>(mockGraph, leftInputOpSpec);
OperatorSpec rightInputOpSpec = mock(OperatorSpec.class);
MessageStreamImpl<TestMessageEnvelope> source2 = new MessageStreamImpl<>(mockGraph, rightInputOpSpec);
JoinFunction<String, TestMessageEnvelope, TestMessageEnvelope, TestOutputMessageEnvelope> mockJoinFn = mock(JoinFunction.class);
Duration joinTtl = Duration.ofMinutes(1);
source1.join(source2, mockJoinFn, mock(Serde.class), mock(Serde.class), mock(Serde.class), joinTtl, "j1");
ArgumentCaptor<OperatorSpec> leftRegisteredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
verify(leftInputOpSpec).registerNextOperatorSpec(leftRegisteredOpCaptor.capture());
OperatorSpec<?, TestMessageEnvelope> leftRegisteredOpSpec = leftRegisteredOpCaptor.getValue();
ArgumentCaptor<OperatorSpec> rightRegisteredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
verify(rightInputOpSpec).registerNextOperatorSpec(rightRegisteredOpCaptor.capture());
OperatorSpec<?, TestMessageEnvelope> rightRegisteredOpSpec = rightRegisteredOpCaptor.getValue();
assertEquals(leftRegisteredOpSpec, rightRegisteredOpSpec);
assertEquals(OpCode.JOIN, leftRegisteredOpSpec.getOpCode());
assertTrue(leftRegisteredOpSpec instanceof JoinOperatorSpec);
assertEquals(mockJoinFn, ((JoinOperatorSpec) leftRegisteredOpSpec).getJoinFn());
assertEquals(joinTtl.toMillis(), ((JoinOperatorSpec) leftRegisteredOpSpec).getTtlMs());
assertEquals(leftInputOpSpec, ((JoinOperatorSpec) leftRegisteredOpSpec).getLeftInputOpSpec());
assertEquals(rightInputOpSpec, ((JoinOperatorSpec) leftRegisteredOpSpec).getRightInputOpSpec());
}
use of org.apache.samza.operators.spec.JoinOperatorSpec in project samza by apache.
the class OperatorImplGraph method createOperatorImpl.
/**
* Creates a new {@link OperatorImpl} instance for the provided {@link OperatorSpec}.
*
* @param prevOperatorSpec the original {@link OperatorSpec} that produces output for {@code operatorSpec} from {@link OperatorSpecGraph}
* @param operatorSpec the original {@link OperatorSpec} from {@link OperatorSpecGraph}
* @param context the {@link Context} required to instantiate operators
* @return the {@link OperatorImpl} implementation instance
*/
OperatorImpl createOperatorImpl(OperatorSpec prevOperatorSpec, OperatorSpec operatorSpec, Context context) {
Config config = context.getJobContext().getConfig();
StreamConfig streamConfig = new StreamConfig(config);
if (operatorSpec instanceof InputOperatorSpec) {
return new InputOperatorImpl((InputOperatorSpec) operatorSpec);
} else if (operatorSpec instanceof StreamOperatorSpec) {
return new FlatmapOperatorImpl((StreamOperatorSpec) operatorSpec);
} else if (operatorSpec instanceof SinkOperatorSpec) {
return new SinkOperatorImpl((SinkOperatorSpec) operatorSpec);
} else if (operatorSpec instanceof OutputOperatorSpec) {
String streamId = ((OutputOperatorSpec) operatorSpec).getOutputStream().getStreamId();
SystemStream systemStream = streamConfig.streamIdToSystemStream(streamId);
return new OutputOperatorImpl((OutputOperatorSpec) operatorSpec, systemStream);
} else if (operatorSpec instanceof PartitionByOperatorSpec) {
String streamId = ((PartitionByOperatorSpec) operatorSpec).getOutputStream().getStreamId();
SystemStream systemStream = streamConfig.streamIdToSystemStream(streamId);
return new PartitionByOperatorImpl((PartitionByOperatorSpec) operatorSpec, systemStream, internalTaskContext);
} else if (operatorSpec instanceof WindowOperatorSpec) {
return new WindowOperatorImpl((WindowOperatorSpec) operatorSpec, clock);
} else if (operatorSpec instanceof JoinOperatorSpec) {
return getOrCreatePartialJoinOpImpls((JoinOperatorSpec) operatorSpec, prevOperatorSpec.equals(((JoinOperatorSpec) operatorSpec).getLeftInputOpSpec()), clock);
} else if (operatorSpec instanceof StreamTableJoinOperatorSpec) {
return new StreamTableJoinOperatorImpl((StreamTableJoinOperatorSpec) operatorSpec, context);
} else if (operatorSpec instanceof SendToTableOperatorSpec) {
return new SendToTableOperatorImpl((SendToTableOperatorSpec) operatorSpec, context);
} else if (operatorSpec instanceof SendToTableWithUpdateOperatorSpec) {
return new SendToTableWithUpdateOperatorImpl((SendToTableWithUpdateOperatorSpec) operatorSpec, context);
} else if (operatorSpec instanceof BroadcastOperatorSpec) {
String streamId = ((BroadcastOperatorSpec) operatorSpec).getOutputStream().getStreamId();
SystemStream systemStream = streamConfig.streamIdToSystemStream(streamId);
return new BroadcastOperatorImpl((BroadcastOperatorSpec) operatorSpec, systemStream, context);
} else if (operatorSpec instanceof AsyncFlatMapOperatorSpec) {
return new AsyncFlatmapOperatorImpl((AsyncFlatMapOperatorSpec) operatorSpec);
}
throw new IllegalArgumentException(String.format("Unsupported OperatorSpec: %s", operatorSpec.getClass().getName()));
}
use of org.apache.samza.operators.spec.JoinOperatorSpec in project samza by apache.
the class OperatorImplGraph method createAndRegisterOperatorImpl.
/**
* Traverses the DAG of {@link OperatorSpec}s starting from the provided {@link OperatorSpec},
* creates the corresponding DAG of {@link OperatorImpl}s, and returns the root {@link OperatorImpl} node.
*
* @param prevOperatorSpec the parent of the current {@code operatorSpec} in the traversal
* @param operatorSpec the {@link OperatorSpec} to create the {@link OperatorImpl} for
* @param inputStream the source input stream that we traverse the {@link OperatorSpecGraph} from
* @param context the {@link Context} required to instantiate operators
* @return the operator implementation for the operatorSpec
*/
private OperatorImpl createAndRegisterOperatorImpl(OperatorSpec prevOperatorSpec, OperatorSpec operatorSpec, SystemStream inputStream, Context context) {
if (!operatorImpls.containsKey(operatorSpec.getOpId()) || operatorSpec instanceof JoinOperatorSpec) {
// Either this is the first time we've seen this operatorSpec, or this is a join operator spec
// and we need to create 2 partial join operator impls for it. Initialize and register the sub-DAG.
OperatorImpl operatorImpl = createOperatorImpl(prevOperatorSpec, operatorSpec, context);
operatorImpl.init(this.internalTaskContext);
operatorImpl.registerInputStream(inputStream);
if (operatorSpec.getScheduledFn() != null) {
final Scheduler scheduler = operatorImpl.createOperatorScheduler();
operatorSpec.getScheduledFn().schedule(scheduler);
}
// Note: The key here is opImplId, which may not equal opId for some impls (e.g. PartialJoinOperatorImpl).
// This is currently OK since we don't need to look up a partial join operator impl again during traversal
// (a join cannot have a cycle).
operatorImpls.put(operatorImpl.getOpImplId(), operatorImpl);
Collection<OperatorSpec> registeredSpecs = operatorSpec.getRegisteredOperatorSpecs();
registeredSpecs.forEach(registeredSpec -> {
LOG.debug("Creating operator {} with opCode: {}", registeredSpec.getOpId(), registeredSpec.getOpCode());
OperatorImpl nextImpl = createAndRegisterOperatorImpl(operatorSpec, registeredSpec, inputStream, context);
operatorImpl.registerNextOperator(nextImpl);
});
return operatorImpl;
} else {
// the implementation corresponding to operatorSpec has already been instantiated and registered.
OperatorImpl operatorImpl = operatorImpls.get(operatorSpec.getOpId());
operatorImpl.registerInputStream(inputStream);
// We still need to traverse the DAG further to register the input streams.
Collection<OperatorSpec> registeredSpecs = operatorSpec.getRegisteredOperatorSpecs();
registeredSpecs.forEach(registeredSpec -> createAndRegisterOperatorImpl(operatorSpec, registeredSpec, inputStream, context));
return operatorImpl;
}
}
use of org.apache.samza.operators.spec.JoinOperatorSpec in project samza by apache.
the class JobGraphJsonGenerator method operatorToMap.
/**
* Format the operator properties into a map
* @param spec a {@link OperatorSpec} instance
* @return map of the operator properties
*/
@VisibleForTesting
Map<String, Object> operatorToMap(OperatorSpec spec) {
Map<String, Object> map = new HashMap<>();
map.put("opCode", spec.getOpCode().name());
map.put("opId", spec.getOpId());
map.put("sourceLocation", spec.getSourceLocation());
Collection<OperatorSpec> nextOperators = spec.getRegisteredOperatorSpecs();
map.put("nextOperatorIds", nextOperators.stream().map(OperatorSpec::getOpId).collect(Collectors.toSet()));
if (spec instanceof OutputOperatorSpec) {
OutputStreamImpl outputStream = ((OutputOperatorSpec) spec).getOutputStream();
map.put("outputStreamId", outputStream.getStreamId());
} else if (spec instanceof PartitionByOperatorSpec) {
OutputStreamImpl outputStream = ((PartitionByOperatorSpec) spec).getOutputStream();
map.put("outputStreamId", outputStream.getStreamId());
}
if (spec instanceof StreamTableJoinOperatorSpec) {
String tableId = ((StreamTableJoinOperatorSpec) spec).getTableId();
map.put("tableId", tableId);
}
if (spec instanceof SendToTableOperatorSpec) {
String tableId = ((SendToTableOperatorSpec) spec).getTableId();
map.put("tableId", tableId);
}
if (spec instanceof JoinOperatorSpec) {
map.put("ttlMs", ((JoinOperatorSpec) spec).getTtlMs());
}
return map;
}
Aggregations