use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class OperatorImplGraph method createAndRegisterOperatorImpl.
/**
* Traverses the DAG of {@link OperatorSpec}s starting from the provided {@link OperatorSpec},
* creates the corresponding DAG of {@link OperatorImpl}s, and returns the root {@link OperatorImpl} node.
*
* @param prevOperatorSpec the parent of the current {@code operatorSpec} in the traversal
* @param operatorSpec the {@link OperatorSpec} to create the {@link OperatorImpl} for
* @param inputStream the source input stream that we traverse the {@link OperatorSpecGraph} from
* @param context the {@link Context} required to instantiate operators
* @return the operator implementation for the operatorSpec
*/
private OperatorImpl createAndRegisterOperatorImpl(OperatorSpec prevOperatorSpec, OperatorSpec operatorSpec, SystemStream inputStream, Context context) {
if (!operatorImpls.containsKey(operatorSpec.getOpId()) || operatorSpec instanceof JoinOperatorSpec) {
// Either this is the first time we've seen this operatorSpec, or this is a join operator spec
// and we need to create 2 partial join operator impls for it. Initialize and register the sub-DAG.
OperatorImpl operatorImpl = createOperatorImpl(prevOperatorSpec, operatorSpec, context);
operatorImpl.init(this.internalTaskContext);
operatorImpl.registerInputStream(inputStream);
if (operatorSpec.getScheduledFn() != null) {
final Scheduler scheduler = operatorImpl.createOperatorScheduler();
operatorSpec.getScheduledFn().schedule(scheduler);
}
// Note: The key here is opImplId, which may not equal opId for some impls (e.g. PartialJoinOperatorImpl).
// This is currently OK since we don't need to look up a partial join operator impl again during traversal
// (a join cannot have a cycle).
operatorImpls.put(operatorImpl.getOpImplId(), operatorImpl);
Collection<OperatorSpec> registeredSpecs = operatorSpec.getRegisteredOperatorSpecs();
registeredSpecs.forEach(registeredSpec -> {
LOG.debug("Creating operator {} with opCode: {}", registeredSpec.getOpId(), registeredSpec.getOpCode());
OperatorImpl nextImpl = createAndRegisterOperatorImpl(operatorSpec, registeredSpec, inputStream, context);
operatorImpl.registerNextOperator(nextImpl);
});
return operatorImpl;
} else {
// the implementation corresponding to operatorSpec has already been instantiated and registered.
OperatorImpl operatorImpl = operatorImpls.get(operatorSpec.getOpId());
operatorImpl.registerInputStream(inputStream);
// We still need to traverse the DAG further to register the input streams.
Collection<OperatorSpec> registeredSpecs = operatorSpec.getRegisteredOperatorSpecs();
registeredSpecs.forEach(registeredSpec -> createAndRegisterOperatorImpl(operatorSpec, registeredSpec, inputStream, context));
return operatorImpl;
}
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class JobGraphJsonGenerator method operatorToMap.
/**
* Format the operator properties into a map
* @param spec a {@link OperatorSpec} instance
* @return map of the operator properties
*/
@VisibleForTesting
Map<String, Object> operatorToMap(OperatorSpec spec) {
Map<String, Object> map = new HashMap<>();
map.put("opCode", spec.getOpCode().name());
map.put("opId", spec.getOpId());
map.put("sourceLocation", spec.getSourceLocation());
Collection<OperatorSpec> nextOperators = spec.getRegisteredOperatorSpecs();
map.put("nextOperatorIds", nextOperators.stream().map(OperatorSpec::getOpId).collect(Collectors.toSet()));
if (spec instanceof OutputOperatorSpec) {
OutputStreamImpl outputStream = ((OutputOperatorSpec) spec).getOutputStream();
map.put("outputStreamId", outputStream.getStreamId());
} else if (spec instanceof PartitionByOperatorSpec) {
OutputStreamImpl outputStream = ((PartitionByOperatorSpec) spec).getOutputStream();
map.put("outputStreamId", outputStream.getStreamId());
}
if (spec instanceof StreamTableJoinOperatorSpec) {
String tableId = ((StreamTableJoinOperatorSpec) spec).getTableId();
map.put("tableId", tableId);
}
if (spec instanceof SendToTableOperatorSpec) {
String tableId = ((SendToTableOperatorSpec) spec).getTableId();
map.put("tableId", tableId);
}
if (spec instanceof JoinOperatorSpec) {
map.put("ttlMs", ((JoinOperatorSpec) spec).getTtlMs());
}
return map;
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class OperatorSpecGraph method doGetOperatorSpecs.
private void doGetOperatorSpecs(OperatorSpec operatorSpec, Set<OperatorSpec> specs) {
Collection<OperatorSpec> registeredOperatorSpecs = operatorSpec.getRegisteredOperatorSpecs();
for (OperatorSpec registeredOperatorSpec : registeredOperatorSpecs) {
specs.add(registeredOperatorSpec);
doGetOperatorSpecs(registeredOperatorSpec, specs);
}
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class ExecutionPlanner method groupJoinedStreams.
/**
* Groups streams participating in joins together.
*/
private static List<StreamSet> groupJoinedStreams(JobGraph jobGraph) {
// Group input operator specs (input/intermediate streams) by the joins they participate in.
Multimap<OperatorSpec, InputOperatorSpec> joinOpSpecToInputOpSpecs = OperatorSpecGraphAnalyzer.getJoinToInputOperatorSpecs(jobGraph.getApplicationDescriptorImpl().getInputOperators().values());
Map<String, TableDescriptor> tableDescriptors = jobGraph.getTables().stream().collect(Collectors.toMap(TableDescriptor::getTableId, Function.identity()));
// Convert every group of input operator specs into a group of corresponding stream edges.
List<StreamSet> streamSets = new ArrayList<>();
for (OperatorSpec joinOpSpec : joinOpSpecToInputOpSpecs.keySet()) {
Collection<InputOperatorSpec> joinedInputOpSpecs = joinOpSpecToInputOpSpecs.get(joinOpSpec);
StreamSet streamSet = getStreamSet(joinOpSpec.getOpId(), joinedInputOpSpecs, jobGraph);
// streams associated with the joined table (if any).
if (joinOpSpec instanceof StreamTableJoinOperatorSpec) {
StreamTableJoinOperatorSpec streamTableJoinOperatorSpec = (StreamTableJoinOperatorSpec) joinOpSpec;
TableDescriptor tableDescriptor = tableDescriptors.get(streamTableJoinOperatorSpec.getTableId());
if (tableDescriptor instanceof LocalTableDescriptor) {
LocalTableDescriptor localTableDescriptor = (LocalTableDescriptor) tableDescriptor;
Collection<String> sideInputs = ListUtils.emptyIfNull(localTableDescriptor.getSideInputs());
Iterable<StreamEdge> sideInputStreams = sideInputs.stream().map(jobGraph::getStreamEdge)::iterator;
Iterable<StreamEdge> streams = streamSet.getStreamEdges();
streamSet = new StreamSet(streamSet.getSetId(), Iterables.concat(streams, sideInputStreams));
}
}
streamSets.add(streamSet);
}
return Collections.unmodifiableList(streamSets);
}
Aggregations