Search in sources :

Example 1 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class BeamFnMapTaskExecutorFactory method create.

/**
 * Creates a new {@link DataflowMapTaskExecutor} from the given {@link MapTask} definition using
 * the provided {@link ReaderFactory}.
 */
@Override
public DataflowMapTaskExecutor create(InstructionRequestHandler instructionRequestHandler, GrpcFnServer<GrpcDataService> grpcDataFnServer, Endpoints.ApiServiceDescriptor dataApiServiceDescriptor, GrpcFnServer<GrpcStateService> grpcStateFnServer, MutableNetwork<Node, Edge> network, PipelineOptions options, String stageName, ReaderFactory readerFactory, SinkFactory sinkFactory, DataflowExecutionContext<?> executionContext, CounterSet counterSet, IdGenerator idGenerator) {
    // TODO: remove this once we trust the code paths
    checkArgument(DataflowRunner.hasExperiment(options.as(DataflowPipelineDebugOptions.class), "beam_fn_api"), "%s should only be used when beam_fn_api is enabled", getClass().getSimpleName());
    // Swap out all the InstructionOutput nodes with OutputReceiver nodes
    Networks.replaceDirectedNetworkNodes(network, createOutputReceiversTransform(stageName, counterSet));
    if (DataflowRunner.hasExperiment(options.as(DataflowPipelineDebugOptions.class), "use_executable_stage_bundle_execution")) {
        LOG.debug("Using SingleEnvironmentInstanceJobBundleFactory");
        JobBundleFactory jobBundleFactory = SingleEnvironmentInstanceJobBundleFactory.create(StaticRemoteEnvironmentFactory.forService(instructionRequestHandler), grpcDataFnServer, grpcStateFnServer, idGenerator);
        // If the use_executable_stage_bundle_execution is enabled, use ExecutableStage instead.
        Networks.replaceDirectedNetworkNodes(network, createOperationTransformForExecutableStageNode(network, stageName, executionContext, jobBundleFactory));
    } else {
        // Swap out all the RegisterFnRequest nodes with Operation nodes
        Networks.replaceDirectedNetworkNodes(network, createOperationTransformForRegisterFnNodes(idGenerator, instructionRequestHandler, grpcStateFnServer.getService(), stageName, executionContext));
        // Swap out all the RemoteGrpcPort nodes with Operation nodes, note that it is expected
        // that the RegisterFnRequest nodes have already been replaced.
        Networks.replaceDirectedNetworkNodes(network, createOperationTransformForGrpcPortNodes(network, grpcDataFnServer.getService(), // TODO: Set NameContext properly for these operations.
        executionContext.createOperationContext(NameContext.create(stageName, stageName, stageName, stageName))));
    }
    // Swap out all the FetchAndFilterStreamingSideInput nodes with operation nodes
    Networks.replaceDirectedNetworkNodes(network, createOperationTransformForFetchAndFilterStreamingSideInputNodes(network, idGenerator, instructionRequestHandler, grpcDataFnServer.getService(), dataApiServiceDescriptor, executionContext, stageName));
    // Swap out all the ParallelInstruction nodes with Operation nodes
    Networks.replaceDirectedNetworkNodes(network, createOperationTransformForParallelInstructionNodes(stageName, network, options, readerFactory, sinkFactory, executionContext));
    // Collect all the operations within the network and attach all the operations as receivers
    // to preceding output receivers.
    List<Operation> topoSortedOperations = new ArrayList<>();
    for (OperationNode node : Iterables.filter(Networks.topologicalOrder(network), OperationNode.class)) {
        topoSortedOperations.add(node.getOperation());
        for (Node predecessor : Iterables.filter(network.predecessors(node), OutputReceiverNode.class)) {
            ((OutputReceiverNode) predecessor).getOutputReceiver().addOutput((Receiver) node.getOperation());
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.info("Map task network: {}", Networks.toDot(network));
    }
    return BeamFnMapTaskExecutor.withSharedCounterSet(topoSortedOperations, counterSet, executionContext.getExecutionStateTracker());
}
Also used : JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) SingleEnvironmentInstanceJobBundleFactory(org.apache.beam.runners.fnexecution.control.SingleEnvironmentInstanceJobBundleFactory) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) RegisterRequestNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RegisterRequestNode) FetchAndFilterStreamingSideInputsNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ExecutableStageNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutableStageNode) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) ArrayList(java.util.ArrayList) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) FlattenOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) WriteOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation) RemoteGrpcPortReadOperation(org.apache.beam.runners.dataflow.worker.fn.data.RemoteGrpcPortReadOperation) RegisterAndProcessBundleOperation(org.apache.beam.runners.dataflow.worker.fn.control.RegisterAndProcessBundleOperation) RemoteGrpcPortWriteOperation(org.apache.beam.runners.dataflow.worker.fn.data.RemoteGrpcPortWriteOperation) ProcessRemoteBundleOperation(org.apache.beam.runners.dataflow.worker.fn.control.ProcessRemoteBundleOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation)

Example 2 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class BeamFnMapTaskExecutorFactory method createParDoOperation.

private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    ParDoInstruction parDo = instruction.getParDo();
    TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
    ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
    int successorOffset = 0;
    for (Node successor : network.successors(node)) {
        for (Edge edge : network.edgesConnecting(node, successor)) {
            outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
        }
        successorOffset += 1;
    }
    ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
Also used : RegisterRequestNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RegisterRequestNode) FetchAndFilterStreamingSideInputsNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ExecutableStageNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutableStageNode) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) TupleTag(org.apache.beam.sdk.values.TupleTag) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)

Example 3 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreateFlattenOperation.

@Test
public void testCreateFlattenOperation() throws Exception {
    int producerIndex1 = 1;
    int producerOutputNum1 = 2;
    int producerIndex2 = 0;
    int producerOutputNum2 = 1;
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createFlattenInstruction(producerIndex1, producerOutputNum1, producerIndex2, producerOutputNum2, "Flatten"), ExecutionLocation.UNKNOWN);
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
    when(network.outDegree(instructionNode)).thenReturn(1);
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(FlattenOperation.class));
    FlattenOperation flattenOperation = (FlattenOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, flattenOperation.receivers.length);
    assertEquals(0, flattenOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, flattenOperation.initializationState);
}
Also used : OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) FlattenOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Test(org.junit.Test)

Example 4 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreateWriteOperation.

@SuppressWarnings("unchecked")
@Test
public void testCreateWriteOperation() throws Exception {
    int producerIndex = 1;
    int producerOutputNum = 2;
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createWriteInstruction(producerIndex, producerOutputNum, "WriteOperation"), ExecutionLocation.UNKNOWN);
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(WriteOperation.class));
    WriteOperation writeOperation = (WriteOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(0, writeOperation.receivers.length);
    assertEquals(Operation.InitializationState.UNSTARTED, writeOperation.initializationState);
    assertThat(writeOperation.sink, instanceOf(SizeReportingSinkWrapper.class));
    assertThat(((SizeReportingSinkWrapper<?>) writeOperation.sink).getUnderlyingSink(), instanceOf(TestSink.class));
    counterSet.extractUpdates(false, updateExtractor);
    verify(updateExtractor).longSum(eq(named("WriteOperation-ByteCount")), anyBoolean(), anyLong());
    verifyNoMoreInteractions(updateExtractor);
}
Also used : OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) WriteOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Test(org.junit.Test)

Example 5 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class IntrinsicMapTaskExecutorFactory method create.

/**
 * Creates a new {@link DataflowMapTaskExecutor} from the given {@link MapTask} definition using
 * the provided {@link ReaderFactory}.
 */
@Override
public DataflowMapTaskExecutor create(InstructionRequestHandler instructionRequestHandler, GrpcFnServer<GrpcDataService> grpcDataFnServer, Endpoints.ApiServiceDescriptor dataApiServiceDescriptor, GrpcFnServer<GrpcStateService> grpcStateFnServer, MutableNetwork<Node, Edge> network, PipelineOptions options, String stageName, ReaderFactory readerFactory, SinkFactory sinkFactory, DataflowExecutionContext<?> executionContext, CounterSet counterSet, IdGenerator idGenerator) {
    // TODO: remove this once we trust the code paths
    checkArgument(!DataflowRunner.hasExperiment(options.as(DataflowPipelineDebugOptions.class), "beam_fn_api"), "experiment beam_fn_api turned on but non-Fn API MapTaskExecutorFactory invoked");
    // Swap out all the InstructionOutput nodes with OutputReceiver nodes
    Networks.replaceDirectedNetworkNodes(network, createOutputReceiversTransform(stageName, counterSet));
    // Swap out all the ParallelInstruction nodes with Operation nodes
    Networks.replaceDirectedNetworkNodes(network, createOperationTransformForParallelInstructionNodes(stageName, network, options, readerFactory, sinkFactory, executionContext));
    // Collect all the operations within the network and attach all the operations as receivers
    // to preceding output receivers.
    List<Operation> topoSortedOperations = new ArrayList<>();
    for (OperationNode node : Iterables.filter(Networks.topologicalOrder(network), OperationNode.class)) {
        topoSortedOperations.add(node.getOperation());
        for (Node predecessor : Iterables.filter(network.predecessors(node), OutputReceiverNode.class)) {
            ((OutputReceiverNode) predecessor).getOutputReceiver().addOutput((Receiver) node.getOperation());
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.info("Map task network: {}", Networks.toDot(network));
    }
    return IntrinsicMapTaskExecutor.withSharedCounterSet(topoSortedOperations, counterSet, executionContext.getExecutionStateTracker());
}
Also used : OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) ArrayList(java.util.ArrayList) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) FlattenOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) WriteOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation)

Aggregations

InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)10 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)10 OperationNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode)10 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)10 ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)7 Test (org.junit.Test)6 OutputReceiverNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode)4 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)3 FlattenOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation)3 ReadOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation)3 WriteOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation)3 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)2 ArrayList (java.util.ArrayList)2 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)2 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)2 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)2 ExecutableStageNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutableStageNode)2 FetchAndFilterStreamingSideInputsNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode)2 RegisterRequestNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.RegisterRequestNode)2 RemoteGrpcPortNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode)2