Search in sources :

Example 6 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreatePartialGroupByKeyOperation.

@Test
public void testCreatePartialGroupByKeyOperation() throws Exception {
    int producerIndex = 1;
    int producerOutputNum = 2;
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createPartialGroupByKeyInstruction(producerIndex, producerOutputNum), ExecutionLocation.UNKNOWN);
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
    when(network.outDegree(instructionNode)).thenReturn(1);
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, PipelineOptionsFactory.create(), readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
    ParDoOperation pgbkOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, pgbkOperation.receivers.length);
    assertEquals(0, pgbkOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, pgbkOperation.initializationState);
}
Also used : OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Test(org.junit.Test)

Example 7 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreateParDoOperation.

@Test
public void testCreateParDoOperation() throws Exception {
    int producerIndex = 1;
    int producerOutputNum = 2;
    BatchModeExecutionContext context = BatchModeExecutionContext.forTesting(options, counterSet, "testStage");
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(createParDoInstruction(producerIndex, producerOutputNum, "DoFn"), ExecutionLocation.UNKNOWN);
    Node outputReceiverNode = IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID));
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.of(outputReceiverNode));
    when(network.outDegree(instructionNode)).thenReturn(1);
    when(network.edgesConnecting(instructionNode, outputReceiverNode)).thenReturn(ImmutableSet.<Edge>of(MultiOutputInfoEdge.create(instructionNode.getParallelInstruction().getParDo().getMultiOutputInfos().get(0))));
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, context).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
    ParDoOperation parDoOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, parDoOperation.receivers.length);
    assertEquals(0, parDoOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, parDoOperation.initializationState);
}
Also used : OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Test(org.junit.Test)

Example 8 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class IntrinsicMapTaskExecutorFactoryTest method testCreatePartialGroupByKeyOperationWithCombine.

@Test
public void testCreatePartialGroupByKeyOperationWithCombine() throws Exception {
    int producerIndex = 1;
    int producerOutputNum = 2;
    ParallelInstruction instruction = createPartialGroupByKeyInstruction(producerIndex, producerOutputNum);
    AppliedCombineFn<?, ?, ?, ?> combineFn = AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
    CloudObject cloudCombineFn = CloudObject.forClassName("CombineFn");
    addString(cloudCombineFn, PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeToByteArray(combineFn)));
    instruction.getPartialGroupByKey().setValueCombiningFn(cloudCombineFn);
    ParallelInstructionNode instructionNode = ParallelInstructionNode.create(instruction, ExecutionLocation.UNKNOWN);
    when(network.successors(instructionNode)).thenReturn(ImmutableSet.<Node>of(IntrinsicMapTaskExecutorFactory.createOutputReceiversTransform(STAGE, counterSet).apply(InstructionOutputNode.create(instructionNode.getParallelInstruction().getOutputs().get(0), PCOLLECTION_ID))));
    when(network.outDegree(instructionNode)).thenReturn(1);
    Node operationNode = mapTaskExecutorFactory.createOperationTransformForParallelInstructionNodes(STAGE, network, options, readerRegistry, sinkRegistry, BatchModeExecutionContext.forTesting(options, counterSet, "testStage")).apply(instructionNode);
    assertThat(operationNode, instanceOf(OperationNode.class));
    assertThat(((OperationNode) operationNode).getOperation(), instanceOf(ParDoOperation.class));
    ParDoOperation pgbkOperation = (ParDoOperation) ((OperationNode) operationNode).getOperation();
    assertEquals(1, pgbkOperation.receivers.length);
    assertEquals(0, pgbkOperation.receivers[0].getReceiverCount());
    assertEquals(Operation.InitializationState.UNSTARTED, pgbkOperation.initializationState);
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) OperationNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Test(org.junit.Test)

Example 9 with ParDoOperation

use of org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation in project beam by apache.

the class IntrinsicMapTaskExecutorTest method testPerElementProcessingTimeCounters.

/**
 * Verify counts for the per-element-output-time counter are correct.
 */
@Test
public void testPerElementProcessingTimeCounters() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(DataflowPipelineDebugOptions.class).setExperiments(Lists.newArrayList(DataflowElementExecutionTracker.TIME_PER_ELEMENT_EXPERIMENT));
    DataflowExecutionStateTracker stateTracker = new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), new TestDataflowExecutionState(NameContext.forStage("test-stage"), "other", null, /* requestingStepName */
    null, /* sideInputIndex */
    null, /* metricsContainer */
    NoopProfileScope.NOOP), counterSet, options, "test-work-item-id");
    NameContext parDoName = nameForStep("s1");
    // Wire a read operation with 3 elements to a ParDoOperation and assert that we count
    // the correct number of elements.
    ReadOperation read = ReadOperation.forTest(new TestReader("a", "b", "c"), new OutputReceiver(), TestOperationContext.create(counterSet, nameForStep("s0"), null, stateTracker));
    ParDoOperation parDo = new ParDoOperation(new NoopParDoFn(), new OutputReceiver[0], TestOperationContext.create(counterSet, parDoName, null, stateTracker));
    parDo.attachInput(read, 0);
    List<Operation> operations = Lists.newArrayList(read, parDo);
    try (IntrinsicMapTaskExecutor executor = IntrinsicMapTaskExecutor.withSharedCounterSet(operations, counterSet, stateTracker)) {
        executor.execute();
    }
    CounterName counterName = CounterName.named("per-element-processing-time").withOriginalName(parDoName);
    Counter<Long, CounterDistribution> counter = (Counter<Long, CounterDistribution>) counterSet.getExistingCounter(counterName);
    assertThat(counter.getAggregate().getCount(), equalTo(3L));
}
Also used : CounterDistribution(org.apache.beam.runners.dataflow.worker.counters.CounterFactory.CounterDistribution) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) TestReader(org.apache.beam.runners.dataflow.worker.util.common.worker.ExecutorTestUtils.TestReader) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) TestOutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.TestOutputReceiver) TestDataflowExecutionState(org.apache.beam.runners.dataflow.worker.TestOperationContext.TestDataflowExecutionState) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) ReadOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation) Operation(org.apache.beam.runners.dataflow.worker.util.common.worker.Operation) ParDoOperation(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation) Counter(org.apache.beam.runners.dataflow.worker.counters.Counter) CounterName(org.apache.beam.runners.dataflow.worker.counters.CounterName) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) DataflowExecutionStateTracker(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowExecutionStateTracker) Test(org.junit.Test)

Aggregations

ParDoOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoOperation)9 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)6 InstructionOutputNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode)5 Node (org.apache.beam.runners.dataflow.worker.graph.Nodes.Node)5 OperationNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OperationNode)5 ParallelInstructionNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode)5 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)5 Test (org.junit.Test)5 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)4 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)4 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)2 PartialGroupByKeyInstruction (com.google.api.services.dataflow.model.PartialGroupByKeyInstruction)2 Counter (org.apache.beam.runners.dataflow.worker.counters.Counter)2 Edge (org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)2 MultiOutputInfoEdge (org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge)2 OutputReceiverNode (org.apache.beam.runners.dataflow.worker.graph.Nodes.OutputReceiverNode)2 ReadOperation (org.apache.beam.runners.dataflow.worker.util.common.worker.ReadOperation)2 KvCoder (org.apache.beam.sdk.coders.KvCoder)2 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2