use of com.google.api.services.dataflow.model.InstructionInput in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createWriteInstruction.
static ParallelInstruction createWriteInstruction(int producerIndex, int producerOutputNum, String systemName) {
InstructionInput cloudInput = new InstructionInput();
cloudInput.setProducerInstructionIndex(producerIndex);
cloudInput.setOutputNum(producerOutputNum);
CloudObject spec = CloudObject.forClass(IntrinsicMapTaskExecutorFactoryTest.TestSinkFactory.class);
com.google.api.services.dataflow.model.Sink cloudSink = new com.google.api.services.dataflow.model.Sink();
cloudSink.setSpec(spec);
cloudSink.setCodec(windowedStringCoder);
WriteInstruction writeInstruction = new WriteInstruction();
writeInstruction.setInput(cloudInput);
writeInstruction.setSink(cloudSink);
ParallelInstruction instruction = new ParallelInstruction();
instruction.setWrite(writeInstruction);
instruction.setSystemName(systemName);
instruction.setOriginalName(systemName + "OriginalName");
return instruction;
}
use of com.google.api.services.dataflow.model.InstructionInput in project beam by apache.
the class MapTaskToNetworkFunctionTest method createInstructionInput.
private static InstructionInput createInstructionInput(int instructionIndex, int outputNum) {
InstructionInput rval = new InstructionInput();
rval.setProducerInstructionIndex(instructionIndex);
rval.setOutputNum(outputNum);
return rval;
}
use of com.google.api.services.dataflow.model.InstructionInput in project beam by apache.
the class MapTaskToNetworkFunction method apply.
@Override
public MutableNetwork<Node, Edge> apply(MapTask mapTask) {
List<ParallelInstruction> parallelInstructions = Apiary.listOrEmpty(mapTask.getInstructions());
MutableNetwork<Node, Edge> network = NetworkBuilder.directed().allowsSelfLoops(false).allowsParallelEdges(true).expectedNodeCount(parallelInstructions.size() * 2).build();
// Add all the instruction nodes and output nodes
ParallelInstructionNode[] instructionNodes = new ParallelInstructionNode[parallelInstructions.size()];
InstructionOutputNode[][] outputNodes = new InstructionOutputNode[parallelInstructions.size()][];
for (int i = 0; i < parallelInstructions.size(); ++i) {
// InstructionOutputNode's are the source of truth on instruction outputs.
// Clear the instruction's outputs to reduce chance for confusion.
List<InstructionOutput> outputs = Apiary.listOrEmpty(parallelInstructions.get(i).getOutputs());
outputNodes[i] = new InstructionOutputNode[outputs.size()];
JsonFactory factory = MoreObjects.firstNonNull(mapTask.getFactory(), Transport.getJsonFactory());
ParallelInstruction parallelInstruction = clone(factory, parallelInstructions.get(i)).setOutputs(null);
ParallelInstructionNode instructionNode = ParallelInstructionNode.create(parallelInstruction, Nodes.ExecutionLocation.UNKNOWN);
instructionNodes[i] = instructionNode;
network.addNode(instructionNode);
// Connect the instruction node output to the output PCollection node
for (int j = 0; j < outputs.size(); ++j) {
InstructionOutput instructionOutput = outputs.get(j);
InstructionOutputNode outputNode = InstructionOutputNode.create(instructionOutput, "generatedPcollection" + this.idGenerator.getId());
network.addNode(outputNode);
if (parallelInstruction.getParDo() != null) {
network.addEdge(instructionNode, outputNode, MultiOutputInfoEdge.create(parallelInstruction.getParDo().getMultiOutputInfos().get(j)));
} else {
network.addEdge(instructionNode, outputNode, DefaultEdge.create());
}
outputNodes[i][j] = outputNode;
}
}
// Connect PCollections as inputs to instructions
for (ParallelInstructionNode instructionNode : instructionNodes) {
ParallelInstruction parallelInstruction = instructionNode.getParallelInstruction();
if (parallelInstruction.getFlatten() != null) {
for (InstructionInput input : Apiary.listOrEmpty(parallelInstruction.getFlatten().getInputs())) {
attachInput(input, network, instructionNode, outputNodes);
}
} else if (parallelInstruction.getParDo() != null) {
attachInput(parallelInstruction.getParDo().getInput(), network, instructionNode, outputNodes);
} else if (parallelInstruction.getPartialGroupByKey() != null) {
attachInput(parallelInstruction.getPartialGroupByKey().getInput(), network, instructionNode, outputNodes);
} else if (parallelInstruction.getRead() != null) {
// Reads have no inputs so nothing to do
} else if (parallelInstruction.getWrite() != null) {
attachInput(parallelInstruction.getWrite().getInput(), network, instructionNode, outputNodes);
} else {
throw new IllegalArgumentException(String.format("Unknown type of instruction %s for map task %s", parallelInstruction, mapTask));
}
}
return network;
}
use of com.google.api.services.dataflow.model.InstructionInput in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createPartialGroupByKeyInstruction.
static ParallelInstruction createPartialGroupByKeyInstruction(int producerIndex, int producerOutputNum) {
InstructionInput cloudInput = new InstructionInput();
cloudInput.setProducerInstructionIndex(producerIndex);
cloudInput.setOutputNum(producerOutputNum);
PartialGroupByKeyInstruction pgbkInstruction = new PartialGroupByKeyInstruction();
pgbkInstruction.setInput(cloudInput);
pgbkInstruction.setInputElementCodec(CloudObjects.asCloudObject(FullWindowedValueCoder.of(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()), IntervalWindowCoder.of()), /*sdkComponents=*/
null));
InstructionOutput output = new InstructionOutput();
output.setName("pgbk_output_name");
output.setCodec(CloudObjects.asCloudObject(KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(BigEndianIntegerCoder.of())), /*sdkComponents=*/
null));
output.setOriginalName("originalName");
output.setSystemName("systemName");
ParallelInstruction instruction = new ParallelInstruction();
instruction.setOriginalName("pgbk_original_name");
instruction.setSystemName("pgbk_system_name");
instruction.setPartialGroupByKey(pgbkInstruction);
instruction.setOutputs(Arrays.asList(output));
return instruction;
}
use of com.google.api.services.dataflow.model.InstructionInput in project beam by apache.
the class IntrinsicMapTaskExecutorFactoryTest method createFlattenInstruction.
static ParallelInstruction createFlattenInstruction(int producerIndex1, int producerOutputNum1, int producerIndex2, int producerOutputNum2, String systemName) {
List<InstructionInput> cloudInputs = new ArrayList<>();
InstructionInput cloudInput1 = new InstructionInput();
cloudInput1.setProducerInstructionIndex(producerIndex1);
cloudInput1.setOutputNum(producerOutputNum1);
cloudInputs.add(cloudInput1);
InstructionInput cloudInput2 = new InstructionInput();
cloudInput2.setProducerInstructionIndex(producerIndex2);
cloudInput2.setOutputNum(producerOutputNum2);
cloudInputs.add(cloudInput2);
FlattenInstruction flattenInstruction = new FlattenInstruction();
flattenInstruction.setInputs(cloudInputs);
InstructionOutput output = new InstructionOutput();
output.setName("flatten_output_name");
output.setCodec(CloudObjects.asCloudObject(StringUtf8Coder.of(), /*sdkComponents=*/
null));
output.setOriginalName("originalName");
output.setSystemName("systemName");
ParallelInstruction instruction = new ParallelInstruction();
instruction.setFlatten(flattenInstruction);
instruction.setOutputs(Arrays.asList(output));
instruction.setSystemName(systemName);
instruction.setOriginalName(systemName + "OriginalName");
return instruction;
}
Aggregations