use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createParDoOperation.
private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
ParDoInstruction parDo = instruction.getParDo();
TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
int successorOffset = 0;
for (Node successor : network.successors(node)) {
for (Edge edge : network.edgesConnecting(node, successor)) {
outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
}
successorOffset += 1;
}
ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
OutputReceiver[] receivers = getOutputReceivers(network, node);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createReadOperation.
OperationNode createReadOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, ReaderFactory readerFactory, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
ReadInstruction read = instruction.getRead();
Source cloudSource = CloudSourceUtils.flattenBaseSpecs(read.getSource());
CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(cloudSource.getCodec()));
NativeReader<?> reader = readerFactory.create(sourceSpec, coder, options, executionContext, operationContext);
OutputReceiver[] receivers = getOutputReceivers(network, node);
return OperationNode.create(ReadOperation.create(reader, receivers, operationContext));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createPartialGroupByKeyOperation.
<K> OperationNode createPartialGroupByKeyOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
OutputReceiver[] receivers = getOutputReceivers(network, node);
Coder<?> windowedCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(pgbk.getInputElementCodec()));
if (!(windowedCoder instanceof WindowedValueCoder)) {
throw new IllegalArgumentException(String.format("unexpected kind of input coder for PartialGroupByKeyOperation: %s", windowedCoder));
}
Coder<?> elemCoder = ((WindowedValueCoder<?>) windowedCoder).getValueCoder();
if (!(elemCoder instanceof KvCoder)) {
throw new IllegalArgumentException(String.format("unexpected kind of input element coder for PartialGroupByKeyOperation: %s", elemCoder));
}
@SuppressWarnings("unchecked") KvCoder<K, ?> keyedElementCoder = (KvCoder<K, ?>) elemCoder;
CloudObject cloudUserFn = pgbk.getValueCombiningFn() != null ? CloudObject.fromSpec(pgbk.getValueCombiningFn()) : null;
ParDoFn fn = PartialGroupByKeyParDoFns.create(options, keyedElementCoder, cloudUserFn, pgbk.getSideInputs(), Arrays.<Receiver>asList(receivers), executionContext, operationContext);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.
the class IntrinsicMapTaskExecutorTest method testPerElementProcessingTimeCounters.
/**
* Verify counts for the per-element-output-time counter are correct.
*/
@Test
public void testPerElementProcessingTimeCounters() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
options.as(DataflowPipelineDebugOptions.class).setExperiments(Lists.newArrayList(DataflowElementExecutionTracker.TIME_PER_ELEMENT_EXPERIMENT));
DataflowExecutionStateTracker stateTracker = new DataflowExecutionStateTracker(ExecutionStateSampler.newForTest(), new TestDataflowExecutionState(NameContext.forStage("test-stage"), "other", null, /* requestingStepName */
null, /* sideInputIndex */
null, /* metricsContainer */
NoopProfileScope.NOOP), counterSet, options, "test-work-item-id");
NameContext parDoName = nameForStep("s1");
// Wire a read operation with 3 elements to a ParDoOperation and assert that we count
// the correct number of elements.
ReadOperation read = ReadOperation.forTest(new TestReader("a", "b", "c"), new OutputReceiver(), TestOperationContext.create(counterSet, nameForStep("s0"), null, stateTracker));
ParDoOperation parDo = new ParDoOperation(new NoopParDoFn(), new OutputReceiver[0], TestOperationContext.create(counterSet, parDoName, null, stateTracker));
parDo.attachInput(read, 0);
List<Operation> operations = Lists.newArrayList(read, parDo);
try (IntrinsicMapTaskExecutor executor = IntrinsicMapTaskExecutor.withSharedCounterSet(operations, counterSet, stateTracker)) {
executor.execute();
}
CounterName counterName = CounterName.named("per-element-processing-time").withOriginalName(parDoName);
Counter<Long, CounterDistribution> counter = (Counter<Long, CounterDistribution>) counterSet.getExistingCounter(counterName);
assertThat(counter.getAggregate().getCount(), equalTo(3L));
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver in project beam by apache.
the class DefaultParDoFnFactoryTest method testCreateSimpleParDoFn.
/**
* Tests that a {@link SimpleParDoFn} is correctly dispatched to {@code UserParDoFnFactory} and
* instantiated correctly.
*/
@Test
public void testCreateSimpleParDoFn() throws Exception {
// A serialized DoFn
String stringFieldValue = "some state";
long longFieldValue = 42L;
TestDoFn fn = new TestDoFn(stringFieldValue, longFieldValue);
String serializedFn = StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
null, /* input coder */
new TupleTag<>("output"), /* main output */
DoFnSchemaInformation.create(), Collections.emptyMap())));
CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
addString(cloudUserFn, "serialized_fn", serializedFn);
// Create the ParDoFn from the serialized DoFn
ParDoFn parDoFn = DEFAULT_FACTORY.create(DEFAULT_OPTIONS, cloudUserFn, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), DEFAULT_EXECUTION_CONTEXT, TestOperationContext.create(counterSet));
// Test that the factory created the correct class
assertThat(parDoFn, instanceOf(SimpleParDoFn.class));
// TODO: move the asserts below into new tests in UserParDoFnFactoryTest, and this test should
// simply assert that DefaultParDoFnFactory.create() matches UserParDoFnFactory.create()
// Test that the DoFnInfo reflects the one passed in
SimpleParDoFn simpleParDoFn = (SimpleParDoFn) parDoFn;
parDoFn.startBundle(new OutputReceiver());
// DoFnInfo may not yet be initialized until an element is processed
parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
@SuppressWarnings("rawtypes") DoFnInfo doFnInfo = simpleParDoFn.getDoFnInfo();
DoFn innerDoFn = (TestDoFn) doFnInfo.getDoFn();
assertThat(innerDoFn, instanceOf(TestDoFn.class));
assertThat(doFnInfo.getWindowingStrategy().getWindowFn(), instanceOf(GlobalWindows.class));
assertThat(doFnInfo.getWindowingStrategy().getTrigger(), instanceOf(DefaultTrigger.class));
// Test that the deserialized user DoFn is as expected
TestDoFn actualTestDoFn = (TestDoFn) innerDoFn;
assertEquals(stringFieldValue, actualTestDoFn.stringField);
assertEquals(longFieldValue, actualTestDoFn.longField);
}
Aggregations