Search in sources :

Example 31 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class OperatorImplGraph method createAndRegisterOperatorImpl.

/**
 * Traverses the DAG of {@link OperatorSpec}s starting from the provided {@link OperatorSpec},
 * creates the corresponding DAG of {@link OperatorImpl}s, and returns the root {@link OperatorImpl} node.
 *
 * @param prevOperatorSpec  the parent of the current {@code operatorSpec} in the traversal
 * @param operatorSpec  the {@link OperatorSpec} to create the {@link OperatorImpl} for
 * @param inputStream  the source input stream that we traverse the {@link OperatorSpecGraph} from
 * @param context the {@link Context} required to instantiate operators
 * @return  the operator implementation for the operatorSpec
 */
private OperatorImpl createAndRegisterOperatorImpl(OperatorSpec prevOperatorSpec, OperatorSpec operatorSpec, SystemStream inputStream, Context context) {
    if (!operatorImpls.containsKey(operatorSpec.getOpId()) || operatorSpec instanceof JoinOperatorSpec) {
        // Either this is the first time we've seen this operatorSpec, or this is a join operator spec
        // and we need to create 2 partial join operator impls for it. Initialize and register the sub-DAG.
        OperatorImpl operatorImpl = createOperatorImpl(prevOperatorSpec, operatorSpec, context);
        operatorImpl.init(this.internalTaskContext);
        operatorImpl.registerInputStream(inputStream);
        if (operatorSpec.getScheduledFn() != null) {
            final Scheduler scheduler = operatorImpl.createOperatorScheduler();
            operatorSpec.getScheduledFn().schedule(scheduler);
        }
        // Note: The key here is opImplId, which may not equal opId for some impls (e.g. PartialJoinOperatorImpl).
        // This is currently OK since we don't need to look up a partial join operator impl again during traversal
        // (a join cannot have a cycle).
        operatorImpls.put(operatorImpl.getOpImplId(), operatorImpl);
        Collection<OperatorSpec> registeredSpecs = operatorSpec.getRegisteredOperatorSpecs();
        registeredSpecs.forEach(registeredSpec -> {
            LOG.debug("Creating operator {} with opCode: {}", registeredSpec.getOpId(), registeredSpec.getOpCode());
            OperatorImpl nextImpl = createAndRegisterOperatorImpl(operatorSpec, registeredSpec, inputStream, context);
            operatorImpl.registerNextOperator(nextImpl);
        });
        return operatorImpl;
    } else {
        // the implementation corresponding to operatorSpec has already been instantiated and registered.
        OperatorImpl operatorImpl = operatorImpls.get(operatorSpec.getOpId());
        operatorImpl.registerInputStream(inputStream);
        // We still need to traverse the DAG further to register the input streams.
        Collection<OperatorSpec> registeredSpecs = operatorSpec.getRegisteredOperatorSpecs();
        registeredSpecs.forEach(registeredSpec -> createAndRegisterOperatorImpl(operatorSpec, registeredSpec, inputStream, context));
        return operatorImpl;
    }
}
Also used : StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) BroadcastOperatorSpec(org.apache.samza.operators.spec.BroadcastOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) SendToTableWithUpdateOperatorSpec(org.apache.samza.operators.spec.SendToTableWithUpdateOperatorSpec) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) AsyncFlatMapOperatorSpec(org.apache.samza.operators.spec.AsyncFlatMapOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) Scheduler(org.apache.samza.operators.Scheduler) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec)

Example 32 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class JobGraphJsonGenerator method operatorToMap.

/**
 * Format the operator properties into a map
 * @param spec a {@link OperatorSpec} instance
 * @return map of the operator properties
 */
@VisibleForTesting
Map<String, Object> operatorToMap(OperatorSpec spec) {
    Map<String, Object> map = new HashMap<>();
    map.put("opCode", spec.getOpCode().name());
    map.put("opId", spec.getOpId());
    map.put("sourceLocation", spec.getSourceLocation());
    Collection<OperatorSpec> nextOperators = spec.getRegisteredOperatorSpecs();
    map.put("nextOperatorIds", nextOperators.stream().map(OperatorSpec::getOpId).collect(Collectors.toSet()));
    if (spec instanceof OutputOperatorSpec) {
        OutputStreamImpl outputStream = ((OutputOperatorSpec) spec).getOutputStream();
        map.put("outputStreamId", outputStream.getStreamId());
    } else if (spec instanceof PartitionByOperatorSpec) {
        OutputStreamImpl outputStream = ((PartitionByOperatorSpec) spec).getOutputStream();
        map.put("outputStreamId", outputStream.getStreamId());
    }
    if (spec instanceof StreamTableJoinOperatorSpec) {
        String tableId = ((StreamTableJoinOperatorSpec) spec).getTableId();
        map.put("tableId", tableId);
    }
    if (spec instanceof SendToTableOperatorSpec) {
        String tableId = ((SendToTableOperatorSpec) spec).getTableId();
        map.put("tableId", tableId);
    }
    if (spec instanceof JoinOperatorSpec) {
        map.put("ttlMs", ((JoinOperatorSpec) spec).getTtlMs());
    }
    return map;
}
Also used : PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) OutputStreamImpl(org.apache.samza.operators.spec.OutputStreamImpl) HashMap(java.util.HashMap) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 33 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class OperatorSpecGraph method doGetOperatorSpecs.

private void doGetOperatorSpecs(OperatorSpec operatorSpec, Set<OperatorSpec> specs) {
    Collection<OperatorSpec> registeredOperatorSpecs = operatorSpec.getRegisteredOperatorSpecs();
    for (OperatorSpec registeredOperatorSpec : registeredOperatorSpecs) {
        specs.add(registeredOperatorSpec);
        doGetOperatorSpecs(registeredOperatorSpec, specs);
    }
}
Also used : OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec)

Example 34 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class ExecutionPlanner method groupJoinedStreams.

/**
 * Groups streams participating in joins together.
 */
private static List<StreamSet> groupJoinedStreams(JobGraph jobGraph) {
    // Group input operator specs (input/intermediate streams) by the joins they participate in.
    Multimap<OperatorSpec, InputOperatorSpec> joinOpSpecToInputOpSpecs = OperatorSpecGraphAnalyzer.getJoinToInputOperatorSpecs(jobGraph.getApplicationDescriptorImpl().getInputOperators().values());
    Map<String, TableDescriptor> tableDescriptors = jobGraph.getTables().stream().collect(Collectors.toMap(TableDescriptor::getTableId, Function.identity()));
    // Convert every group of input operator specs into a group of corresponding stream edges.
    List<StreamSet> streamSets = new ArrayList<>();
    for (OperatorSpec joinOpSpec : joinOpSpecToInputOpSpecs.keySet()) {
        Collection<InputOperatorSpec> joinedInputOpSpecs = joinOpSpecToInputOpSpecs.get(joinOpSpec);
        StreamSet streamSet = getStreamSet(joinOpSpec.getOpId(), joinedInputOpSpecs, jobGraph);
        // streams associated with the joined table (if any).
        if (joinOpSpec instanceof StreamTableJoinOperatorSpec) {
            StreamTableJoinOperatorSpec streamTableJoinOperatorSpec = (StreamTableJoinOperatorSpec) joinOpSpec;
            TableDescriptor tableDescriptor = tableDescriptors.get(streamTableJoinOperatorSpec.getTableId());
            if (tableDescriptor instanceof LocalTableDescriptor) {
                LocalTableDescriptor localTableDescriptor = (LocalTableDescriptor) tableDescriptor;
                Collection<String> sideInputs = ListUtils.emptyIfNull(localTableDescriptor.getSideInputs());
                Iterable<StreamEdge> sideInputStreams = sideInputs.stream().map(jobGraph::getStreamEdge)::iterator;
                Iterable<StreamEdge> streams = streamSet.getStreamEdges();
                streamSet = new StreamSet(streamSet.getSetId(), Iterables.concat(streams, sideInputStreams));
            }
        }
        streamSets.add(streamSet);
    }
    return Collections.unmodifiableList(streamSets);
}
Also used : InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) ArrayList(java.util.ArrayList) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec)

Aggregations

OperatorSpec (org.apache.samza.operators.spec.OperatorSpec)34 SinkOperatorSpec (org.apache.samza.operators.spec.SinkOperatorSpec)20 JoinOperatorSpec (org.apache.samza.operators.spec.JoinOperatorSpec)18 StreamOperatorSpec (org.apache.samza.operators.spec.StreamOperatorSpec)18 StreamTableJoinOperatorSpec (org.apache.samza.operators.spec.StreamTableJoinOperatorSpec)18 OutputOperatorSpec (org.apache.samza.operators.spec.OutputOperatorSpec)17 SendToTableOperatorSpec (org.apache.samza.operators.spec.SendToTableOperatorSpec)17 WindowOperatorSpec (org.apache.samza.operators.spec.WindowOperatorSpec)16 Test (org.junit.Test)16 TestMessageEnvelope (org.apache.samza.operators.data.TestMessageEnvelope)15 PartitionByOperatorSpec (org.apache.samza.operators.spec.PartitionByOperatorSpec)15 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)14 IntermediateMessageStreamImpl (org.apache.samza.operators.stream.IntermediateMessageStreamImpl)12 InputOperatorSpec (org.apache.samza.operators.spec.InputOperatorSpec)11 FlatMapFunction (org.apache.samza.operators.functions.FlatMapFunction)7 HashMap (java.util.HashMap)5 HashSet (java.util.HashSet)5 PartialJoinOperatorSpec (org.apache.samza.operators.spec.PartialJoinOperatorSpec)5 KVSerde (org.apache.samza.serializers.KVSerde)5 Collection (java.util.Collection)4