Search in sources :

Example 1 with OutputReceiver

use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.

the class BeamFnMapTaskExecutorFactory method createOperationTransformForFetchAndFilterStreamingSideInputNodes.

private Function<Node, Node> createOperationTransformForFetchAndFilterStreamingSideInputNodes(MutableNetwork<Node, Edge> network, IdGenerator idGenerator, InstructionRequestHandler instructionRequestHandler, FnDataService beamFnDataService, Endpoints.ApiServiceDescriptor dataApiServiceDescriptor, DataflowExecutionContext executionContext, String stageName) {
    return new TypeSafeNodeFunction<FetchAndFilterStreamingSideInputsNode>(FetchAndFilterStreamingSideInputsNode.class) {

        @Override
        public Node typedApply(FetchAndFilterStreamingSideInputsNode input) {
            OutputReceiverNode output = (OutputReceiverNode) Iterables.getOnlyElement(network.successors(input));
            DataflowOperationContext operationContext = executionContext.createOperationContext(NameContext.create(stageName, input.getNameContext().originalName(), input.getNameContext().systemName(), input.getNameContext().userName()));
            return OperationNode.create(new FetchAndFilterStreamingSideInputsOperation<>(new OutputReceiver[] { output.getOutputReceiver() }, operationContext, instructionRequestHandler, beamFnDataService, dataApiServiceDescriptor, idGenerator, (Coder<WindowedValue<Object>>) output.getCoder(), (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy(), executionContext.getStepContext(operationContext), input.getPCollectionViewsToWindowMappingFns()));
        }
    };
}
Also used : OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) FetchAndFilterStreamingSideInputsNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) TypeSafeNodeFunction(org.apache.beam.runners.dataflow.worker.graph.Networks.TypeSafeNodeFunction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy)

Example 2 with OutputReceiver

use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.

the class BeamFnMapTaskExecutorFactory method createOperationTransformForExecutableStageNode.

private Function<Node, Node> createOperationTransformForExecutableStageNode(final Network<Node, Edge> network, final String stageName, final DataflowExecutionContext<?> executionContext, final JobBundleFactory jobBundleFactory) {
    return new TypeSafeNodeFunction<ExecutableStageNode>(ExecutableStageNode.class) {

        @Override
        public Node typedApply(ExecutableStageNode input) {
            StageBundleFactory stageBundleFactory = jobBundleFactory.forStage(input.getExecutableStage());
            Iterable<OutputReceiverNode> outputReceiverNodes = Iterables.filter(network.successors(input), OutputReceiverNode.class);
            Map<String, OutputReceiver> outputReceiverMap = new HashMap<>();
            Lists.newArrayList(outputReceiverNodes).stream().forEach(outputReceiverNode -> outputReceiverMap.put(outputReceiverNode.getPcollectionId(), outputReceiverNode.getOutputReceiver()));
            ImmutableMap.Builder<String, DataflowOperationContext> ptransformIdToOperationContextBuilder = ImmutableMap.builder();
            for (Map.Entry<String, NameContext> entry : input.getPTransformIdToPartialNameContextMap().entrySet()) {
                NameContext fullNameContext = NameContext.create(stageName, entry.getValue().originalName(), entry.getValue().systemName(), entry.getValue().userName());
                DataflowOperationContext operationContext = executionContext.createOperationContext(fullNameContext);
                ptransformIdToOperationContextBuilder.put(entry.getKey(), operationContext);
            }
            ImmutableMap<String, DataflowOperationContext> ptransformIdToOperationContexts = ptransformIdToOperationContextBuilder.build();
            ImmutableMap<String, SideInputReader> ptransformIdToSideInputReaders = buildPTransformIdToSideInputReadersMap(executionContext, input, ptransformIdToOperationContexts);
            Map<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> ptransformIdToSideInputIdToPCollectionView = buildSideInputIdToPCollectionView(input);
            return OperationNode.create(new ProcessRemoteBundleOperation(input.getExecutableStage(), executionContext.createOperationContext(NameContext.create(stageName, stageName, stageName, stageName)), stageBundleFactory, outputReceiverMap, ptransformIdToSideInputReaders, ptransformIdToSideInputIdToPCollectionView));
        }
    };
}
Also used : HashMap(java.util.HashMap) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) ProcessRemoteBundleOperation(org.apache.beam.runners.dataflow.worker.fn.control.ProcessRemoteBundleOperation) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) SideInputReader(org.apache.beam.runners.core.SideInputReader) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) PCollectionView(org.apache.beam.sdk.values.PCollectionView) TypeSafeNodeFunction(org.apache.beam.runners.dataflow.worker.graph.Networks.TypeSafeNodeFunction) ExecutableStageNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutableStageNode) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) HashMap(java.util.HashMap)

Example 3 with OutputReceiver

use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.

the class BeamFnMapTaskExecutorFactory method createOperationTransformForGrpcPortNodes.

private Function<Node, Node> createOperationTransformForGrpcPortNodes(final Network<Node, Edge> network, final FnDataService beamFnDataService, final OperationContext context) {
    return new TypeSafeNodeFunction<RemoteGrpcPortNode>(RemoteGrpcPortNode.class) {

        @Override
        public Node typedApply(RemoteGrpcPortNode input) {
            RegisterAndProcessBundleOperation registerFnOperation = (RegisterAndProcessBundleOperation) Iterables.getOnlyElement(Iterables.filter(network.adjacentNodes(input), OperationNode.class)).getOperation();
            // The coder comes from the one and only adjacent output node
            Coder<?> coder = Iterables.getOnlyElement(Iterables.filter(network.adjacentNodes(input), OutputReceiverNode.class)).getCoder();
            // We figure out whether we are outputting some where if the output node is a
            // successor.
            Iterable<OutputReceiverNode> outputReceiverNodes = Iterables.filter(network.successors(input), OutputReceiverNode.class);
            Operation operation;
            if (outputReceiverNodes.iterator().hasNext()) {
                OutputReceiver[] outputReceivers = new OutputReceiver[] { Iterables.getOnlyElement(outputReceiverNodes).getOutputReceiver() };
                operation = new RemoteGrpcPortReadOperation<>(beamFnDataService, input.getPrimitiveTransformId(), registerFnOperation::getProcessBundleInstructionId, (Coder) coder, outputReceivers, context);
            } else {
                operation = new RemoteGrpcPortWriteOperation<>(beamFnDataService, input.getPrimitiveTransformId(), registerFnOperation::getProcessBundleInstructionId, (Coder) coder, context);
            }
            return OperationNode.create(operation);
        }
    };
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) FlattenOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) WriteOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation) RemoteGrpcPortReadOperation(org.apache.beam.runners.dataflow.worker.fn.data.RemoteGrpcPortReadOperation) RegisterAndProcessBundleOperation(org.apache.beam.runners.dataflow.worker.fn.control.RegisterAndProcessBundleOperation) RemoteGrpcPortWriteOperation(org.apache.beam.runners.dataflow.worker.fn.data.RemoteGrpcPortWriteOperation) ProcessRemoteBundleOperation(org.apache.beam.runners.dataflow.worker.fn.control.ProcessRemoteBundleOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation) RegisterAndProcessBundleOperation(org.apache.beam.runners.dataflow.worker.fn.control.RegisterAndProcessBundleOperation) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) TypeSafeNodeFunction(org.apache.beam.runners.dataflow.worker.graph.Networks.TypeSafeNodeFunction)

Example 4 with OutputReceiver

use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.

the class BeamFnMapTaskExecutorFactory method createReadOperation.

OperationNode createReadOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, ReaderFactory readerFactory, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    ReadInstruction read = instruction.getRead();
    Source cloudSource = CloudSourceUtils.flattenBaseSpecs(read.getSource());
    CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
    Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(cloudSource.getCodec()));
    NativeReader<?> reader = readerFactory.create(sourceSpec, coder, options, executionContext, operationContext);
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    return OperationNode.create(ReadOperation.create(reader, receivers, operationContext));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ReadInstruction(com.google.api.services.dataflow.model.ReadInstruction) Source(com.google.api.services.dataflow.model.Source)

Example 5 with OutputReceiver

use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.

the class BeamFnMapTaskExecutorFactory method createParDoOperation.

private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    ParDoInstruction parDo = instruction.getParDo();
    TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
    ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
    int successorOffset = 0;
    for (Node successor : network.successors(node)) {
        for (Edge edge : network.edgesConnecting(node, successor)) {
            outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
        }
        successorOffset += 1;
    }
    ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
Also used : RegisterRequestNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RegisterRequestNode) FetchAndFilterStreamingSideInputsNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ExecutableStageNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutableStageNode) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) TupleTag(org.apache.beam.sdk.values.TupleTag) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)

Aggregations

OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)22 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)11 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)10 Test (org.junit.Test)9 ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)7 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)6 CounterSet (org.apache.beam.runners.dataflow.worker.counters.CounterSet)6 Receiver (org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver)6 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)6 TypeSafeNodeFunction (org.apache.beam.runners.dataflow.worker.graph.Networks.TypeSafeNodeFunction)5 OutputReceiverNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode)5 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)4 KvCoder (org.apache.beam.sdk.coders.KvCoder)4 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)4 OperationNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode)3 ElementCounter (org.apache.beam.runners.dataflow.worker.util.common.worker.ElementCounter)3 Operation (org.apache.beam.runners.dataflow.worker.util.common.worker.Operation)3 ReadOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation)3 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)2 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)2