use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createParDoOperation.
private OperationNode createParDoOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
ParDoInstruction parDo = instruction.getParDo();
TupleTag<?> mainOutputTag = tupleTag(parDo.getMultiOutputInfos().get(0));
ImmutableMap.Builder<TupleTag<?>, Integer> outputTagsToReceiverIndicesBuilder = ImmutableMap.builder();
int successorOffset = 0;
for (Node successor : network.successors(node)) {
for (Edge edge : network.edgesConnecting(node, successor)) {
outputTagsToReceiverIndicesBuilder.put(tupleTag(((MultiOutputInfoEdge) edge).getMultiOutputInfo()), successorOffset);
}
successorOffset += 1;
}
ParDoFn fn = parDoFnFactory.create(options, CloudObject.fromSpec(parDo.getUserFn()), parDo.getSideInputs(), mainOutputTag, outputTagsToReceiverIndicesBuilder.build(), executionContext, operationContext);
OutputReceiver[] receivers = getOutputReceivers(network, node);
return OperationNode.create(new ParDoOperation(fn, receivers, operationContext));
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class BatchDataflowWorker method doWork.
/**
* Performs the given work; returns true if successful.
*
* @throws IOException Only if the WorkUnitClient fails.
*/
@VisibleForTesting
boolean doWork(WorkItem workItem, WorkItemStatusClient workItemStatusClient) throws IOException {
LOG.debug("Executing: {}", workItem);
DataflowWorkExecutor worker = null;
SdkWorkerHarness sdkWorkerHarness = sdkHarnessRegistry.getAvailableWorkerAndAssignWork();
try {
// Populate PipelineOptions with data from work unit.
options.setProject(workItem.getProjectId());
final String stageName;
if (workItem.getMapTask() != null) {
stageName = workItem.getMapTask().getStageName();
} else if (workItem.getSourceOperationTask() != null) {
stageName = workItem.getSourceOperationTask().getStageName();
} else {
throw new RuntimeException("Unknown kind of work item: " + workItem.toString());
}
CounterSet counterSet = new CounterSet();
BatchModeExecutionContext executionContext = BatchModeExecutionContext.create(counterSet, sideInputDataCache, sideInputWeakReferenceCache, readerRegistry, options, stageName, String.valueOf(workItem.getId()));
if (workItem.getMapTask() != null) {
MutableNetwork<Node, Edge> network = mapTaskToNetwork.apply(workItem.getMapTask());
if (LOG.isDebugEnabled()) {
LOG.debug("Network as Graphviz .dot: {}", Networks.toDot(network));
}
worker = mapTaskExecutorFactory.create(sdkWorkerHarness.getControlClientHandler(), sdkWorkerHarness.getGrpcDataFnServer(), sdkHarnessRegistry.beamFnDataApiServiceDescriptor(), sdkWorkerHarness.getGrpcStateFnServer(), network, options, stageName, readerRegistry, sinkRegistry, executionContext, counterSet, idGenerator);
} else if (workItem.getSourceOperationTask() != null) {
worker = SourceOperationExecutorFactory.create(options, workItem.getSourceOperationTask(), counterSet, executionContext, stageName);
} else {
throw new IllegalStateException("Work Item was neither a MapTask nor a SourceOperation");
}
workItemStatusClient.setWorker(worker, executionContext);
DataflowWorkProgressUpdater progressUpdater = new DataflowWorkProgressUpdater(workItemStatusClient, workItem, worker, options);
executeWork(worker, progressUpdater);
workItemStatusClient.reportSuccess();
return true;
} catch (Throwable e) {
workItemStatusClient.reportError(e);
return false;
} finally {
if (worker != null) {
try {
worker.close();
} catch (Exception exn) {
LOG.warn("Uncaught exception while closing worker. All work has already committed or " + "been marked for retry.", exn);
}
}
if (sdkWorkerHarness != null) {
sdkHarnessRegistry.completeWork(sdkWorkerHarness);
}
}
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class RemoveFlattenInstructionsFunctionTest method testFlattenMultiplePCollectionsHavingMultipleConsumers.
@Test
public void testFlattenMultiplePCollectionsHavingMultipleConsumers() {
Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
Node aPCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out"), PCOLLECTION_ID);
Edge aOutput = DefaultEdge.create();
Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
Edge bOutput = DefaultEdge.create();
Node bPCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out"), PCOLLECTION_ID);
Node flatten = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
Node flattenPCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten.out"), PCOLLECTION_ID);
Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
Edge cOutput = DefaultEdge.create();
Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
Node d = ParallelInstructionNode.create(new ParallelInstruction().setName("D"), Nodes.ExecutionLocation.UNKNOWN);
Edge dOutput = DefaultEdge.create();
Node dPCollection = InstructionOutputNode.create(new InstructionOutput().setName("D.out"), PCOLLECTION_ID);
// A --\
// -> Flatten --> C
// B --/-------------> D
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(a);
network.addNode(aPCollection);
network.addNode(b);
network.addNode(bPCollection);
network.addNode(flatten);
network.addNode(flattenPCollection);
network.addNode(c);
network.addNode(cPCollection);
network.addEdge(a, aPCollection, aOutput);
network.addEdge(aPCollection, flatten, DefaultEdge.create());
network.addEdge(b, bPCollection, bOutput);
network.addEdge(bPCollection, flatten, DefaultEdge.create());
network.addEdge(bPCollection, d, DefaultEdge.create());
network.addEdge(flatten, flattenPCollection, DefaultEdge.create());
network.addEdge(flattenPCollection, c, DefaultEdge.create());
network.addEdge(c, cPCollection, cOutput);
network.addEdge(d, dPCollection, dOutput);
// A --\
// -> C
// B --/-> D
assertThatFlattenIsProperlyRemoved(network);
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class RemoveFlattenInstructionsFunctionTest method testRemoveFlattenOnMultiOutputInstruction.
@Test
public void testRemoveFlattenOnMultiOutputInstruction() {
Node a = ParallelInstructionNode.create(new ParallelInstruction().setName("A"), Nodes.ExecutionLocation.UNKNOWN);
Node aOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out1"), PCOLLECTION_ID);
Node aOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out2"), PCOLLECTION_ID);
Node aOut3PCollection = InstructionOutputNode.create(new InstructionOutput().setName("A.out3"), PCOLLECTION_ID);
Edge aOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
Edge aOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
Edge aOut3 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out3"));
Edge aOut1PCollectionEdge = DefaultEdge.create();
Node b = ParallelInstructionNode.create(new ParallelInstruction().setName("B"), Nodes.ExecutionLocation.UNKNOWN);
Node bOut1PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
Node bOut2PCollection = InstructionOutputNode.create(new InstructionOutput().setName("B.out1"), PCOLLECTION_ID);
Edge bOut1 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out1"));
Edge bOut2 = MultiOutputInfoEdge.create(new MultiOutputInfo().setTag("out2"));
Edge bOut1PCollectionEdge = DefaultEdge.create();
Node flatten = ParallelInstructionNode.create(new ParallelInstruction().setName("Flatten").setFlatten(new FlattenInstruction()), Nodes.ExecutionLocation.UNKNOWN);
Node flattenPCollection = InstructionOutputNode.create(new InstructionOutput().setName("Flatten.out"), PCOLLECTION_ID);
Node c = ParallelInstructionNode.create(new ParallelInstruction().setName("C"), Nodes.ExecutionLocation.UNKNOWN);
Edge cOutput = DefaultEdge.create();
Node cPCollection = InstructionOutputNode.create(new InstructionOutput().setName("C.out"), PCOLLECTION_ID);
Node d = ParallelInstructionNode.create(new ParallelInstruction().setName("D"), Nodes.ExecutionLocation.UNKNOWN);
Edge dOutput = DefaultEdge.create();
Node dPCollection = InstructionOutputNode.create(new InstructionOutput().setName("D.out"), PCOLLECTION_ID);
Node e = ParallelInstructionNode.create(new ParallelInstruction().setName("E"), Nodes.ExecutionLocation.UNKNOWN);
Edge eOutput = DefaultEdge.create();
Node ePCollection = InstructionOutputNode.create(new InstructionOutput().setName("E.out"), PCOLLECTION_ID);
// /-out1-> C
// A -out2-\
// \-out3--> Flatten --> D
// B -out2-/
// \-out1-> E
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(a);
network.addNode(aOut1PCollection);
network.addNode(aOut2PCollection);
network.addNode(aOut3PCollection);
network.addNode(b);
network.addNode(bOut1PCollection);
network.addNode(bOut2PCollection);
network.addNode(flatten);
network.addNode(flattenPCollection);
network.addNode(c);
network.addNode(cPCollection);
network.addNode(d);
network.addNode(dPCollection);
network.addNode(e);
network.addNode(ePCollection);
network.addEdge(a, aOut1PCollection, aOut1);
network.addEdge(a, aOut2PCollection, aOut2);
network.addEdge(a, aOut3PCollection, aOut3);
network.addEdge(aOut1PCollection, c, aOut1PCollectionEdge);
network.addEdge(aOut2PCollection, flatten, DefaultEdge.create());
network.addEdge(aOut3PCollection, flatten, DefaultEdge.create());
network.addEdge(b, bOut1PCollection, bOut1);
network.addEdge(b, bOut2PCollection, bOut2);
network.addEdge(bOut1PCollection, e, bOut1PCollectionEdge);
network.addEdge(bOut2PCollection, flatten, DefaultEdge.create());
network.addEdge(flatten, flattenPCollection, DefaultEdge.create());
network.addEdge(flattenPCollection, d, DefaultEdge.create());
network.addEdge(c, cPCollection, cOutput);
network.addEdge(d, dPCollection, dOutput);
network.addEdge(e, ePCollection, eOutput);
// /-out1-> C
// A -out2-\
// \-out3--> D
// B -out2-/
// \-out1-> E
assertThatFlattenIsProperlyRemoved(network);
}
use of org.apache.beam.runners.dataflow.worker.graph.Edges.Edge in project beam by apache.
the class CreateRegisterFnOperationFunctionTest method testAllRunnerGraph.
@Test
public void testAllRunnerGraph() {
Node readNode = createReadNode("Read", Nodes.ExecutionLocation.RUNNER_HARNESS);
Edge readNodeEdge = DefaultEdge.create();
Node readNodeOut = createInstructionOutputNode("Read.out");
Edge readNodeOutEdge = DefaultEdge.create();
Node parDoNode = createParDoNode("ParDo", Nodes.ExecutionLocation.RUNNER_HARNESS);
Edge parDoNodeEdge = DefaultEdge.create();
Node parDoNodeOut = createInstructionOutputNode("ParDo.out");
// Read -out-> ParDo
MutableNetwork<Node, Edge> expectedNetwork = createEmptyNetwork();
expectedNetwork.addNode(readNode);
expectedNetwork.addNode(readNodeOut);
expectedNetwork.addNode(parDoNode);
expectedNetwork.addNode(parDoNodeOut);
expectedNetwork.addEdge(readNode, readNodeOut, readNodeEdge);
expectedNetwork.addEdge(readNodeOut, parDoNode, readNodeOutEdge);
expectedNetwork.addEdge(parDoNode, parDoNodeOut, parDoNodeEdge);
MutableNetwork<Node, Edge> appliedNetwork = createRegisterFnOperation.apply(Graphs.copyOf(expectedNetwork));
assertNetworkMaintainsBipartiteStructure(appliedNetwork);
assertEquals(String.format("Expected network %s but got network %s", expectedNetwork, appliedNetwork), expectedNetwork, appliedNetwork);
}
Aggregations