use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class ReplacePgbkWithPrecombineFunctionTest method testPrecombinePgbkIsReplaced.
@Test
public void testPrecombinePgbkIsReplaced() throws Exception {
// Network:
// out1 --> precombine_pgbk --> out2
Map<String, Object> valueCombiningFn = new HashMap<>();
Node out1 = createInstructionOutputNode("out1");
String pgbkName = "precombine_pgbk";
Node precombinePgbk = createPrecombinePgbkNode(pgbkName, valueCombiningFn);
Node out2 = createInstructionOutputNode("out2");
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(out1);
network.addNode(precombinePgbk);
network.addNode(out2);
network.addEdge(out1, precombinePgbk, DefaultEdge.create());
network.addEdge(precombinePgbk, out2, DefaultEdge.create());
Network<Node, Edge> inputNetwork = ImmutableNetwork.copyOf(network);
network = new ReplacePgbkWithPrecombineFunction().apply(network);
// Assert that network has same structure (same number of nodes and paths).
assertEquals(inputNetwork.nodes().size(), network.nodes().size());
assertEquals(inputNetwork.edges().size(), network.edges().size());
List<List<Node>> oldPaths = Networks.allPathsFromRootsToLeaves(inputNetwork);
List<List<Node>> newPaths = Networks.allPathsFromRootsToLeaves(network);
assertEquals(oldPaths.size(), newPaths.size());
// Assert that the pgbk node has been replaced.
for (Node node : network.nodes()) {
if (node instanceof ParallelInstructionNode) {
ParallelInstructionNode createdCombineNode = (ParallelInstructionNode) node;
ParallelInstruction parallelInstruction = createdCombineNode.getParallelInstruction();
assertEquals(parallelInstruction.getName(), pgbkName);
assertNull(parallelInstruction.getPartialGroupByKey());
assertNotNull(parallelInstruction.getParDo());
ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
assertEquals(parDoInstruction.getUserFn(), valueCombiningFn);
break;
}
}
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class CloneAmbiguousFlattensFunctionTest method testNonAmbiguousFlattens.
/**
* Tests that a network with non-ambiguous flattens can still clone the ambiguous flattens
* properly, without leaving any ambiguous flattens, modifying the non-ambiguous flattens, or
* changing the number of paths.
*/
@Test
public void testNonAmbiguousFlattens() throws Exception {
// sdk2+out -\
// sdk_flatten+out --> sdk3+out
// sdk1+out ----\ /
// ambig_flatten+out
// runner1+out -/ \-> runner2+out -\
// runner_flatten+out --> runner4+out
// runner3+out -/
MutableNetwork<Node, Edge> network = createEmptyNetwork();
Node sdk1 = createSdkNode("sdk1");
Node sdk2 = createSdkNode("sdk2");
Node sdk3 = createSdkNode("sdk3");
Node sdk1Out = createPCollection("sdk1.out");
Node sdk2Out = createPCollection("sdk2.out");
Node sdk3Out = createPCollection("sdk3.out");
Node runner1 = createRunnerNode("runner1");
Node runner2 = createRunnerNode("runner2");
Node runner3 = createRunnerNode("runner3");
Node runner4 = createRunnerNode("runner4");
Node runner1Out = createPCollection("runner1.out");
Node runner2Out = createPCollection("runner2.out");
Node runner3Out = createPCollection("runner3.out");
Node runner4Out = createPCollection("runner4.out");
Node ambiguousFlatten = createFlatten("ambiguous_flatten", ExecutionLocation.AMBIGUOUS);
Node ambiguousFlattenOut = createPCollection("ambiguous_flatten.out");
Node sdkFlatten = createFlatten("sdk_flatten", ExecutionLocation.SDK_HARNESS);
Node sdkFlattenOut = createPCollection("sdk_flatten.out");
Node runnerFlatten = createFlatten("runner_flatten", ExecutionLocation.RUNNER_HARNESS);
Node runnerFlattenOut = createPCollection("runner_flatten.out");
network.addNode(sdk1);
network.addNode(sdk2);
network.addNode(sdk3);
network.addNode(sdk1Out);
network.addNode(sdk2Out);
network.addNode(sdk3Out);
network.addNode(runner1);
network.addNode(runner2);
network.addNode(runner3);
network.addNode(runner4);
network.addNode(runner1Out);
network.addNode(runner2Out);
network.addNode(runner3Out);
network.addNode(runner4Out);
network.addNode(ambiguousFlatten);
network.addNode(ambiguousFlattenOut);
network.addNode(sdkFlatten);
network.addNode(sdkFlattenOut);
network.addNode(runnerFlatten);
network.addNode(runnerFlattenOut);
network.addEdge(sdk1, sdk1Out, DefaultEdge.create());
network.addEdge(sdk2, sdk2Out, DefaultEdge.create());
network.addEdge(sdk3, sdk3Out, DefaultEdge.create());
network.addEdge(runner1, runner1Out, DefaultEdge.create());
network.addEdge(runner2, runner2Out, DefaultEdge.create());
network.addEdge(runner3, runner3Out, DefaultEdge.create());
network.addEdge(runner4, runner4Out, DefaultEdge.create());
network.addEdge(ambiguousFlatten, ambiguousFlattenOut, DefaultEdge.create());
network.addEdge(sdkFlatten, sdkFlattenOut, DefaultEdge.create());
network.addEdge(runnerFlatten, runnerFlattenOut, DefaultEdge.create());
network.addEdge(sdk1Out, ambiguousFlatten, DefaultEdge.create());
network.addEdge(runner1Out, ambiguousFlatten, DefaultEdge.create());
network.addEdge(ambiguousFlattenOut, sdkFlatten, DefaultEdge.create());
network.addEdge(sdk2Out, sdkFlatten, DefaultEdge.create());
network.addEdge(sdkFlattenOut, sdk3, DefaultEdge.create());
network.addEdge(ambiguousFlattenOut, runner2, DefaultEdge.create());
network.addEdge(runner2Out, runnerFlatten, DefaultEdge.create());
network.addEdge(runner3Out, runnerFlatten, DefaultEdge.create());
network.addEdge(runnerFlattenOut, runner4, DefaultEdge.create());
// Apply function and perform assertions
List<List<Node>> originalPaths = Networks.allPathsFromRootsToLeaves(network);
network = new CloneAmbiguousFlattensFunction().apply(network);
for (Node node : network.nodes()) {
if (node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getFlatten() != null) {
ParallelInstructionNode castNode = ((ParallelInstructionNode) node);
assertTrue("Ambiguous flatten not removed from network.", castNode.getExecutionLocation() != ExecutionLocation.AMBIGUOUS);
if ("sdk_flatten".equals(castNode.getParallelInstruction().getName())) {
assertSame("SDK flatten has been incorrectly modified.", sdkFlatten, castNode);
} else if ("runner_flatten".equals(castNode.getParallelInstruction().getName())) {
assertSame("Runner flatten has been incorrectly modified.", runnerFlatten, castNode);
}
}
}
assertEquals(originalPaths.size(), Networks.allPathsFromRootsToLeaves(network).size());
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class CloneAmbiguousFlattensFunctionTest method testSingleFlatten.
/**
* Tests that a single ambiguous flatten clones properly, with the proper edges between
* predecessors and successors, and that no new paths are created.
*/
@Test
public void testSingleFlatten() throws Exception {
// sdk_predecessor -----> out -\ /-> sdk_successor --> out
// ambiguous_flatten --> out -> no_location_successor --> out
// runner_predecessor --> out -/ \-> runner_successor --> out
MutableNetwork<Node, Edge> network = createEmptyNetwork();
Node sdkPredecessor = createSdkNode("sdk_predecessor");
Node runnerPredecessor = createRunnerNode("runner_predecessor");
Node sdkPredecessorOutput = createPCollection("sdk_predecessor.out");
Node runnerPredecessorOutput = createPCollection("runner_predecessor.out");
Node ambiguousFlatten = createFlatten("ambiguous_flatten", ExecutionLocation.AMBIGUOUS);
Node ambiguousFlattenOutput = createPCollection("ambiguous_flatten.out");
Node sdkSuccessor = createSdkNode("sdk_successor");
Node runnerSuccessor = createRunnerNode("runner_successor");
Node noLocationSuccessor = createNoLocationNode();
Node sdkSuccessorOutput = createPCollection("sdk_successor.out");
Node runnerSuccessorOutput = createPCollection("runner_successor.out");
Node noLocationSuccessorOutput = createPCollection("no_location_successor.out");
network.addNode(sdkPredecessor);
network.addNode(runnerPredecessor);
network.addNode(sdkPredecessorOutput);
network.addNode(runnerPredecessorOutput);
network.addNode(ambiguousFlatten);
network.addNode(ambiguousFlattenOutput);
network.addNode(sdkSuccessor);
network.addNode(runnerSuccessor);
network.addNode(noLocationSuccessor);
network.addNode(sdkSuccessorOutput);
network.addNode(runnerSuccessorOutput);
network.addNode(noLocationSuccessorOutput);
network.addEdge(sdkPredecessor, sdkPredecessorOutput, DefaultEdge.create());
network.addEdge(runnerPredecessor, runnerPredecessorOutput, DefaultEdge.create());
network.addEdge(sdkPredecessorOutput, ambiguousFlatten, DefaultEdge.create());
network.addEdge(runnerPredecessorOutput, ambiguousFlatten, DefaultEdge.create());
network.addEdge(ambiguousFlatten, ambiguousFlattenOutput, DefaultEdge.create());
network.addEdge(ambiguousFlattenOutput, sdkSuccessor, DefaultEdge.create());
network.addEdge(ambiguousFlattenOutput, runnerSuccessor, DefaultEdge.create());
network.addEdge(ambiguousFlattenOutput, noLocationSuccessor, DefaultEdge.create());
network.addEdge(sdkSuccessor, sdkSuccessorOutput, DefaultEdge.create());
network.addEdge(runnerSuccessor, runnerSuccessorOutput, DefaultEdge.create());
network.addEdge(noLocationSuccessor, noLocationSuccessorOutput, DefaultEdge.create());
// After:
// SdkPredecessor -----> out --> SdkFlatten --> out --> SdkSuccessor --> out
// X
// RunnerPredecessor --> out --> RunnerFlatten --> out --> RunnerSuccessor --> out
// \-> NoLocationSuccessor --> out
List<List<Node>> originalPaths = Networks.allPathsFromRootsToLeaves(network);
network = new CloneAmbiguousFlattensFunction().apply(network);
// Get sdk and runner flattens and outputs.
ParallelInstructionNode sdkFlatten = null;
ParallelInstructionNode runnerFlatten = null;
for (Node node : network.nodes()) {
if (node instanceof ParallelInstructionNode && ((ParallelInstructionNode) node).getParallelInstruction().getFlatten() != null) {
ParallelInstructionNode castNode = ((ParallelInstructionNode) node);
if (castNode.getExecutionLocation() == ExecutionLocation.SDK_HARNESS) {
sdkFlatten = castNode;
} else if (castNode.getExecutionLocation() == ExecutionLocation.RUNNER_HARNESS) {
runnerFlatten = castNode;
} else {
assertTrue("Ambiguous flatten not removed from network.", castNode.getExecutionLocation() != ExecutionLocation.AMBIGUOUS);
}
}
}
assertNotNull("Ambiguous flatten was not cloned into sdk flatten.", sdkFlatten);
assertNotNull("Ambiguous flatten was not cloned into runner flatten.", runnerFlatten);
Node sdkFlattenOutput = Iterables.getOnlyElement(network.successors(sdkFlatten));
Node runnerFlattenOutput = Iterables.getOnlyElement(network.successors(runnerFlatten));
assertEquals(2, network.predecessors(sdkFlatten).size());
assertEquals(2, network.predecessors(runnerFlatten).size());
assertEquals(1, network.successors(sdkFlattenOutput).size());
assertEquals(2, network.successors(runnerFlattenOutput).size());
assertSame(sdkSuccessor, Iterables.getOnlyElement(network.successors(sdkFlattenOutput)));
assertThat(network.successors(runnerFlattenOutput), hasItems(runnerSuccessor, noLocationSuccessor));
assertEquals(originalPaths.size(), Networks.allPathsFromRootsToLeaves(network).size());
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class LengthPrefixUnknownCodersTest method testLengthPrefixAndReplaceForRunnerNetwork.
@Test
public void testLengthPrefixAndReplaceForRunnerNetwork() throws Exception {
Node readNode = createReadNode("Read", "Source", windowedValueCoder);
Edge readNodeEdge = DefaultEdge.create();
Node readNodeOut = createInstructionOutputNode("Read.out", windowedValueCoder);
MutableNetwork<Node, Edge> network = createEmptyNetwork();
network.addNode(readNode);
network.addNode(readNodeOut);
network.addEdge(readNode, readNodeOut, readNodeEdge);
ParallelInstructionNode prefixedReadNode = createReadNode("Read", "Source", prefixedAndReplacedWindowedValueCoder);
InstructionOutputNode prefixedReadNodeOut = createInstructionOutputNode("Read.out", prefixedAndReplacedWindowedValueCoder);
MutableNetwork<Node, Edge> prefixedNetwork = andReplaceForRunnerNetwork(network);
ImmutableSet.Builder<GenericJson> prefixedInstructions = ImmutableSet.builder();
for (Node node : prefixedNetwork.nodes()) {
if (node instanceof ParallelInstructionNode) {
prefixedInstructions.add(((ParallelInstructionNode) node).getParallelInstruction());
} else if (node instanceof InstructionOutputNode) {
prefixedInstructions.add(((InstructionOutputNode) node).getInstructionOutput());
}
}
assertThat(prefixedInstructions.build(), containsInAnyOrder(jsonOf(prefixedReadNodeOut.getInstructionOutput()), jsonOf(prefixedReadNode.getParallelInstruction())));
}
use of org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode in project beam by apache.
the class LengthPrefixUnknownCodersTest method testLengthPrefixForInstructionOutputNodeWithNonGrpcNodeNeighbor.
@Test
public void testLengthPrefixForInstructionOutputNodeWithNonGrpcNodeNeighbor() {
MutableNetwork<Node, Edge> network = createEmptyNetwork();
ParallelInstructionNode readNode = createReadNode("read", "source", windowedValueCoder);
network.addNode(instructionOutputNode);
network.addNode(readNode);
network.addEdge(readNode, instructionOutputNode, DefaultEdge.create());
assertEqualsAsJson(CloudObjects.asCloudObject(windowedValueCoder, /*sdkComponents=*/
null), ((InstructionOutputNode) forInstructionOutputNode(network).apply(instructionOutputNode)).getInstructionOutput().getCodec());
}
Aggregations