Search in sources :

Example 1 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class BeamFnMapTaskExecutorFactory method createParDoOperation.

private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    ParDoInstruction parDo = instruction.getParDo();
    TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
    ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
    int successorOffset = 0;
    for (Node successor : network.successors(node)) {
        for (Edge edge : network.edgesConnecting(node, successor)) {
            outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
        }
        successorOffset += 1;
    }
    ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
Also used : RegisterRequestNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RegisterRequestNode) FetchAndFilterStreamingSideInputsNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ExecutableStageNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ExecutableStageNode) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) TupleTag(org.apache.beam.sdk.values.TupleTag) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)

Example 2 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreateMapTaskExecutor.

@Test
public void testCreateMapTaskExecutor() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"), createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFnWithContext"), createFlattenInstruction(1, 0, 2, 0, "Flatten"), createWriteInstruction(3, 0, "Write"));
    MapTask mapTask = new MapTask();
    mapTask.setStageName(STAGE);
    mapTask.setSystemName("systemName");
    mapTask.setInstructions(instructions);
    mapTask.setFactory(Transport.getJsonFactory());
    try (DataflowMapTaskExecutor executor = mapTaskExecutorFactory.create(null, /* beamFnControlClientHandler */
    null, /* GrpcFnServer<GrpcDataService> */
    null, /* ApiServiceDescriptor */
    null, /* GrpcFnServer<GrpcStateService> */
    mapTaskToNetwork.apply(mapTask), options, STAGE, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage"), counterSet, idGenerator)) {
        // Safe covariant cast not expressible without rawtypes.
        @SuppressWarnings({ // TODO(https://issues.apache.org/jira/browse/BEAM-10556)
        "rawtypes", "unchecked" }) List<Object> operations = (List) executor.operations;
        assertThat(operations, hasItems(instanceOf(ReadOperation.class), instanceOf(ParDoOperation.class), instanceOf(ParDoOperation.class), instanceOf(FlattenOperation.class), instanceOf(WriteOperation.class)));
        // Verify that the inputs are attached.
        ReadOperation readOperation = Iterables.getOnlyElement(Iterables.filter(operations, ReadOperation.class));
        assertEquals(2, readOperation.receivers[0].getReceiverCount());
        FlattenOperation flattenOperation = Iterables.getOnlyElement(Iterables.filter(operations, FlattenOperation.class));
        for (ParDoOperation operation : Iterables.filter(operations, ParDoOperation.class)) {
            assertSame(flattenOperation, operation.receivers[0].getOnlyReceiver());
        }
        WriteOperation writeOperation = Iterables.getOnlyElement(Iterables.filter(operations, WriteOperation.class));
        assertSame(writeOperation, flattenOperation.receivers[0].getOnlyReceiver());
    }
    @SuppressWarnings("unchecked") Counter<Long, ?> otherMsecCounter = (Counter<Long, ?>) counterSet.getExistingCounter("test-other-msecs");
    // "other" state only got created upon MapTaskExecutor.execute().
    assertNull(otherMsecCounter);
    counterSet.extractUpdates(false, updateExtractor);
    verifyOutputCounters(updateExtractor, "read_output_name", "DoFn1_output", "DoFnWithContext_output", "flatten_output_name");
    verify(updateExtractor).longSum(eq(named("Read-ByteCount")), anyBoolean(), anyLong());
    verify(updateExtractor).longSum(eq(named("Write-ByteCount")), anyBoolean(), anyLong());
    verifyNoMoreInteractions(updateExtractor);
}
Also used : ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) FlattenOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) WriteOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation) MapTask(com.google.api.services.dataflow.model.MapTask) Matchers.anyLong(org.mockito.Matchers.anyLong) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 3 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class BeamFnMapTaskExecutorFactory method createPartialGroupByKeyOperation.

<K> OperationNode createPartialGroupByKeyOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    Coder<?> windowedCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(pgbk.getInputElementCodec()));
    if (!(windowedCoder instanceof WindowedValueCoder)) {
        throw new IllegalArgumentException(String.format("unexpected kind of input coder for PartialGroupByKeyOperation: %s", windowedCoder));
    }
    Coder<?> elemCoder = ((WindowedValueCoder<?>) windowedCoder).getValueCoder();
    if (!(elemCoder instanceof KvCoder)) {
        throw new IllegalArgumentException(String.format("unexpected kind of input element coder for PartialGroupByKeyOperation: %s", elemCoder));
    }
    @SuppressWarnings("unchecked") KvCoder<K, ?> keyedElementCoder = (KvCoder<K, ?>) elemCoder;
    CloudObject cloudUserFn = pgbk.getValueCombiningFn() != null ? CloudObject.fromSpec(pgbk.getValueCombiningFn()) : null;
    ParDoFn fn = PartialGroupByKeyParDoFns.create(options, keyedElementCoder, cloudUserFn, pgbk.getSideInputs(), Arrays.<Receiver>asList(receivers), executionContext, operationContext);
    return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
Also used : OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) KvCoder(org.apache.beam.sdk.coders.KvCoder) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PartialGroupByKeyInstruction(com.google.api.services.dataflow.model.PartialGroupByKeyInstruction)

Example 4 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class IntrinsicMapTaskExecutorFactory method createParDoOperation.

private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    ParDoInstruction parDo = instruction.getParDo();
    TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
    ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
    int successorOffset = 0;
    for (Node successor : network.successors(node)) {
        for (Edge edge : network.edgesConnecting(node, successor)) {
            outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
        }
        successorOffset += 1;
    }
    ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
Also used : InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) OutputReceiverNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode) TupleTag(org.apache.beam.sdk.values.TupleTag) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)

Example 5 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class IntrinsicMapTaskExecutorFactory method createPartialGroupByKeyOperation.

<K> OperationNode createPartialGroupByKeyOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    Coder<?> windowedCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(pgbk.getInputElementCodec()));
    if (!(windowedCoder instanceof WindowedValueCoder)) {
        throw new IllegalArgumentException(String.format("unexpected kind of input coder for PartialGroupByKeyOperation: %s", windowedCoder));
    }
    Coder<?> elemCoder = ((WindowedValueCoder<?>) windowedCoder).getValueCoder();
    if (!(elemCoder instanceof KvCoder)) {
        throw new IllegalArgumentException(String.format("unexpected kind of input element coder for PartialGroupByKeyOperation: %s", elemCoder));
    }
    @SuppressWarnings("unchecked") KvCoder<K, ?> keyedElementCoder = (KvCoder<K, ?>) elemCoder;
    CloudObject cloudUserFn = pgbk.getValueCombiningFn() != null ? CloudObject.fromSpec(pgbk.getValueCombiningFn()) : null;
    ParDoFn fn = PartialGroupByKeyParDoFns.create(options, keyedElementCoder, cloudUserFn, pgbk.getSideInputs(), Arrays.<Receiver>asList(receivers), executionContext, operationContext);
    return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
Also used : OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) KvCoder(org.apache.beam.sdk.coders.KvCoder) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PartialGroupByKeyInstruction(com.google.api.services.dataflow.model.PartialGroupByKeyInstruction)

Aggregations

ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)9 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)6 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)5 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)5 OperationNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode)5 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)5 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)5 Test (org.junit.Test)5 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)4 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)4 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)2 PartialGroupByKeyInstruction (com.google.api.services.dataflow.model.PartialGroupByKeyInstruction)2 Counter (org.apache.beam.runners.dataflow.worker.counters.Counter)2 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)2 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)2 OutputReceiverNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode)2 ReadOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation)2 KvCoder (org.apache.beam.sdk.coders.KvCoder)2 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2