Search in sources :

Example 6 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class IntrinsicMapTaskExecutorFactory method createParDoOperation.

private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    ParDoInstruction parDo = instruction.getParDo();
    TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
    ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
    int successorOffset = 0;
    for (Node successor : network.successors(node)) {
        for (Edge edge : network.edgesConnecting(node, successor)) {
            outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
        }
        successorOffset += 1;
    }
    ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
Also used : InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) TupleTag(org.apache.beam.sdk.values.TupleTag) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)

Example 7 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreatePartialGroupByKeyOperation.

@Test
public void testCreatePartialGroupByKeyOperation() throws Exception {
    int producerIndex = 1;
    int producerOutputNum = 2;
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createPartialGroupByKeyInstruction(producerIndex, producerOutputNum), ExecutionLocation.UNKNOWN);
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
    when(network.outDegree(instructionNode)).thenReturn(1);
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, PipelineOptionsFactory.create(), readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
    ParDoOperation pgbkOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, pgbkOperation.receivers.length);
    assertEquals(0, pgbkOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, pgbkOperation.initializationState);
}
Also used : OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Test(org.junit.Test)

Example 8 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreateParDoOperation.

@Test
public void testCreateParDoOperation() throws Exception {
    int producerIndex = 1;
    int producerOutputNum = 2;
    BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, counterSet, "testStage");
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createParDoInstruction(producerIndex, producerOutputNum, "DoFn"), ExecutionLocation.UNKNOWN);
    Node outputReceiverNode = IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID));
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.of(outputReceiverNode));
    when(network.outDegree(instructionNode)).thenReturn(1);
    when(network.edgesConnecting(instructionNode, outputReceiverNode)).thenReturn(ImmutableSet.<Edge>of(MultiOutputInfoEdge.create(instructionNode.getParallelInstruction().getParDo().getMultiOutputInfos().get(0))));
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, context).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
    ParDoOperation parDoOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, parDoOperation.receivers.length);
    assertEquals(0, parDoOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, parDoOperation.initializationState);
}
Also used : OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Test(org.junit.Test)

Example 9 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreatePartialGroupByKeyOperationWithCombine.

@Test
public void testCreatePartialGroupByKeyOperationWithCombine() throws Exception {
    int producerIndex = 1;
    int producerOutputNum = 2;
    ParallelInstruction instruction = createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
    AppliedCombineFn<?, ?, ?, ?> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
    CloudObject cloudCombineFn = CloudObject.forClassName("CombineFn");
    addString(cloudCombineFn, PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(combineFn)));
    instruction.getPartialGroupByKey().setValueCombiningFn(cloudCombineFn);
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(instruction, ExecutionLocation.UNKNOWN);
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
    when(network.outDegree(instructionNode)).thenReturn(1);
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
    ParDoOperation pgbkOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, pgbkOperation.receivers.length);
    assertEquals(0, pgbkOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, pgbkOperation.initializationState);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Test(org.junit.Test)

Example 10 with OperationNode

use of org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreateReadOperation.

@Test
public void testCreateReadOperation() throws Exception {
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createReadInstruction("Read"), ExecutionLocation.UNKNOWN);
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
    when(network.outDegree(instructionNode)).thenReturn(1);
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, PipelineOptionsFactory.create(), readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ReadOperation.class));
    ReadOperation readOperation = (ReadOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, readOperation.receivers.length);
    assertEquals(0, readOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, readOperation.initializationState);
    assertThat(readOperation.reader, instanceOf(ReaderFactoryTest.TestReader.class));
    counterSet.extractUpdates(false, updateExtractor);
    verifyOutputCounters(updateExtractor, "read_output_name");
    verify(updateExtractor).longSum(eq(named("Read-ByteCount")), anyBoolean(), anyLong());
    verifyNoMoreInteractions(updateExtractor);
}
Also used : OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Test(org.junit.Test)

Aggregations

InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)10 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)10 OperationNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode)10 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)10 ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)7 Test (org.junit.Test)6 OutputReceiverNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode)4 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)3 FlattenOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation)3 ReadOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation)3 WriteOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation)3 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)2 ArrayList (java.util.ArrayList)2 DataflowPipelineDebugOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions)2 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)2 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)2 ExecutableStageNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutableStageNode)2 FetchAndFilterStreamingSideInputsNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode)2 RegisterRequestNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.RegisterRequestNode)2 RemoteGrpcPortNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode)2