Search in sources :

Example 21 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreateMapTaskExecutor.

@Test
public void testCreateMapTaskExecutor() throws Exception {
    List<ParallelInstruction> instructions = Arrays.asList(createReadInstruction("Read"), createParDoInstruction(0, 0, "DoFn1"), createParDoInstruction(0, 0, "DoFnWithContext"), createFlattenInstruction(1, 0, 2, 0, "Flatten"), createWriteInstruction(3, 0, "Write"));
    MapTask mapTask = new MapTask();
    mapTask.setStageName(STAGE);
    mapTask.setSystemName("systemName");
    mapTask.setInstructions(instructions);
    mapTask.setFactory(Transport.getJsonFactory());
    try (DataflowMapTaskExecutor executor = mapTaskExecutorFactory.create(null, /* beamFnControlClientHandler */
    null, /* GrpcFnServer<GrpcDataService> */
    null, /* ApiServiceDescriptor */
    null, /* GrpcFnServer<GrpcStateService> */
    mapTaskToNetwork.apply(mapTask), options, STAGE, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage"), counterSet, idGenerator)) {
        // Safe covariant cast not expressible without rawtypes.
        @SuppressWarnings({ // TODO(https://issues.apache.org/jira/browse/BEAM-10556)
        "rawtypes", "unchecked" }) List<Object> operations = (List) executor.operations;
        assertThat(operations, hasItems(instanceOf(ReadOperation.class), instanceOf(ParDoOperation.class), instanceOf(ParDoOperation.class), instanceOf(FlattenOperation.class), instanceOf(WriteOperation.class)));
        // Verify that the inputs are attached.
        ReadOperation readOperation = Iterables.getOnlyElement(Iterables.filter(operations, ReadOperation.class));
        assertEquals(2, readOperation.receivers[0].getReceiverCount());
        FlattenOperation flattenOperation = Iterables.getOnlyElement(Iterables.filter(operations, FlattenOperation.class));
        for (ParDoOperation operation : Iterables.filter(operations, ParDoOperation.class)) {
            assertSame(flattenOperation, operation.receivers[0].getOnlyReceiver());
        }
        WriteOperation writeOperation = Iterables.getOnlyElement(Iterables.filter(operations, WriteOperation.class));
        assertSame(writeOperation, flattenOperation.receivers[0].getOnlyReceiver());
    }
    @SuppressWarnings("unchecked") Counter<Long, ?> otherMsecCounter = (Counter<Long, ?>) counterSet.getExistingCounter("test-other-msecs");
    // "other" state only got created upon MapTaskExecutor.execute().
    assertNull(otherMsecCounter);
    counterSet.extractUpdates(false, updateExtractor);
    verifyOutputCounters(updateExtractor, "read_output_name", "DoFn1_output", "DoFnWithContext_output", "flatten_output_name");
    verify(updateExtractor).longSum(eq(named("Read-ByteCount")), anyBoolean(), anyLong());
    verify(updateExtractor).longSum(eq(named("Write-ByteCount")), anyBoolean(), anyLong());
    verifyNoMoreInteractions(updateExtractor);
}
Also used : ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) FlattenOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.FlattenOperation) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) WriteOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.WriteOperation) MapTask(com.google.api.services.dataflow.model.MapTask) Matchers.anyLong(org.mockito.Matchers.anyLong) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 22 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class IntrinsicMapTaskExecutorFactory method createOperationTransformForParallelInstructionNodes.

/**
 * Creates an {@link Operation} from the given {@link ParallelInstruction} definition using the
 * provided {@link ReaderFactory}.
 */
Function<Node, Node> createOperationTransformForParallelInstructionNodes(final String stageName, final Network<Node, Edge> network, final PipelineOptions options, final ReaderFactory readerFactory, final SinkFactory sinkFactory, final DataflowExecutionContext<?> executionContext) {
    return new TypeSafeNodeFunction<ParallelInstructionNode>(ParallelInstructionNode.class) {

        @Override
        public Node typedApply(ParallelInstructionNode node) {
            ParallelInstruction instruction = node.getParallelInstruction();
            NameContext nameContext = NameContext.create(stageName, instruction.getOriginalName(), instruction.getSystemName(), instruction.getName());
            try {
                DataflowOperationContext context = executionContext.createOperationContext(nameContext);
                if (instruction.getRead() != null) {
                    return createReadOperation(network, node, options, readerFactory, executionContext, context);
                } else if (instruction.getWrite() != null) {
                    return createWriteOperation(node, options, sinkFactory, executionContext, context);
                } else if (instruction.getParDo() != null) {
                    return createParDoOperation(network, node, options, executionContext, context);
                } else if (instruction.getPartialGroupByKey() != null) {
                    return createPartialGroupByKeyOperation(network, node, options, executionContext, context);
                } else if (instruction.getFlatten() != null) {
                    return createFlattenOperation(network, node, context);
                } else {
                    throw new IllegalArgumentException(String.format("Unexpected instruction: %s", instruction));
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    };
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) TypeSafeNodeFunction(org.apache.beam.runners.dataflow.worker.graph.Networks.TypeSafeNodeFunction)

Example 23 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class IntrinsicMapTaskExecutorFactory method createWriteOperation.

OperationNode createWriteOperation(ParallelInstructionNode node, PipelineOptions options, SinkFactory sinkFactory, DataflowExecutionContext executionContext, DataflowOperationContext context) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    WriteInstruction write = instruction.getWrite();
    Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(write.getSink().getCodec()));
    CloudObject cloudSink = CloudObject.fromSpec(write.getSink().getSpec());
    Sink<?> sink = sinkFactory.create(cloudSink, coder, options, executionContext, context);
    return OperationNode.create(WriteOperation.create(sink, EMPTY_OUTPUT_RECEIVER_ARRAY, context));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) WriteInstruction(com.google.api.services.dataflow.model.WriteInstruction)

Example 24 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class FixMultiOutputInfosOnParDoInstructionsTest method createMapTaskWithParDo.

private static MapTask createMapTaskWithParDo(int numOutputs, String... tags) {
    ParDoInstruction parDoInstruction = new ParDoInstruction();
    parDoInstruction.setNumOutputs(numOutputs);
    List<MultiOutputInfo> multiOutputInfos = new ArrayList<>(tags.length);
    for (String tag : tags) {
        MultiOutputInfo multiOutputInfo = new MultiOutputInfo();
        multiOutputInfo.setTag(tag);
        multiOutputInfos.add(multiOutputInfo);
    }
    parDoInstruction.setMultiOutputInfos(multiOutputInfos);
    ParallelInstruction instruction = new ParallelInstruction();
    instruction.setParDo(parDoInstruction);
    MapTask mapTask = new MapTask();
    mapTask.setInstructions(ImmutableList.of(instruction));
    return mapTask;
}
Also used : ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) MultiOutputInfo(com.google.api.services.dataflow.model.MultiOutputInfo) MapTask(com.google.api.services.dataflow.model.MapTask) ArrayList(java.util.ArrayList)

Example 25 with ParallelInstruction

use of com.google.api.services.dataflow.model.ParallelInstruction in project beam by apache.

the class DeduceNodeLocationsFunctionTest method assertNodesIdenticalExceptForExecutionLocation.

/**
 * Asserts two nodes are identical except for ExecutionLocation, which can differ.
 */
private void assertNodesIdenticalExceptForExecutionLocation(Node expected, Node actual) {
    assertThat(expected, instanceOf(ParallelInstructionNode.class));
    assertThat(actual, instanceOf(ParallelInstructionNode.class));
    ParallelInstruction expectedContents = ((ParallelInstructionNode) expected).getParallelInstruction();
    ParallelInstruction actualContents = ((ParallelInstructionNode) actual).getParallelInstruction();
    assertEquals(expectedContents, actualContents);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)

Aggregations

ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)73 Test (org.junit.Test)39 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)27 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)26 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)24 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)22 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)21 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)20 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)18 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)17 DefaultEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)17 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)16 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)12 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 InstructionInput (com.google.api.services.dataflow.model.InstructionInput)11 MapTask (com.google.api.services.dataflow.model.MapTask)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)11 DataflowCounterUpdateExtractor.splitIntToLong (org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong)11 WorkItemCommitRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItemCommitRequest)11 UnsignedLong (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong)11