use of org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode in project beam by apache.
the class GreedyStageFuserTest method executableStageProducingSideInputMaterializesIt.
@Test
public void executableStageProducingSideInputMaterializesIt() {
// impulse -- ParDo(createSide)
// \_ ParDo(processMain) with side input from createSide
// The ExecutableStage executing createSide must have an output.
Environment env = Environments.createDockerEnvironment("common");
PTransform impulse = PTransform.newBuilder().setUniqueName("impulse").putOutputs("output", "impulsePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build();
PTransform createSide = PTransform.newBuilder().setUniqueName("createSide").putInputs("input", "impulsePC").putOutputs("output", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform processMain = PTransform.newBuilder().setUniqueName("processMain").putInputs("main", "impulsePC").putInputs("side", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
PCollection sidePC = PCollection.newBuilder().setUniqueName("sidePC").build();
PCollection impulsePC = PCollection.newBuilder().setUniqueName("impulsePC").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("impulse", impulse).putTransforms("createSide", createSide).putTransforms("processMain", processMain).putPcollections("impulsePC", impulsePC).putPcollections("sidePC", sidePC).putEnvironments("common", env).build());
PCollectionNode impulseOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("impulse", impulse)));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutput, ImmutableSet.of(PipelineNode.pTransform("createSide", createSide)));
assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("sidePC", sidePC)));
}
use of org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode in project beam by apache.
the class GreedyStageFuserTest method materializesWithGroupByKeyConsumer.
@Test
public void materializesWithGroupByKeyConsumer() {
// (impulse.out) -> read -> read.out -> gbk -> gbk.out
// Fuses to
// (impulse.out) -> read -> (read.out)
// GBK is the responsibility of the runner, so it is not included in a stage.
Environment env = Environments.createDockerEnvironment("common");
PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("gbk", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "gbk.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).build()).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
assertThat(subgraph.getOutputPCollections(), contains(readOutput));
assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
use of org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode in project beam by apache.
the class ProcessBundleDescriptors method addSideInputs.
private static Map<String, Map<String, SideInputSpec>> addSideInputs(ExecutableStage stage, Components.Builder components) throws IOException {
ImmutableTable.Builder<String, String, SideInputSpec> idsToSpec = ImmutableTable.builder();
for (SideInputReference sideInputReference : stage.getSideInputs()) {
// Update the coder specification for side inputs to be length prefixed so that the
// SDK and Runner agree on how to encode/decode the key, window, and values for
// side inputs.
PCollectionNode pcNode = sideInputReference.collection();
PCollection pc = pcNode.getPCollection();
String lengthPrefixedCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(pc.getCoderId(), components, false);
components.putPcollections(pcNode.getId(), pc.toBuilder().setCoderId(lengthPrefixedCoderId).build());
FullWindowedValueCoder<KV<?, ?>> coder = (FullWindowedValueCoder) WireCoders.instantiateRunnerWireCoder(pcNode, components.build());
idsToSpec.put(sideInputReference.transform().getId(), sideInputReference.localName(), SideInputSpec.of(sideInputReference.transform().getId(), sideInputReference.localName(), getAccessPattern(sideInputReference), coder.getValueCoder(), coder.getWindowCoder()));
}
return idsToSpec.build().rowMap();
}
use of org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode in project beam by apache.
the class ProcessBundleDescriptors method addStageOutputs.
private static Map<String, Coder<WindowedValue<?>>> addStageOutputs(ApiServiceDescriptor dataEndpoint, Collection<PCollectionNode> outputPCollections, Components.Builder components, Collection<WireCoderSetting> wireCoderSettings) throws IOException {
Map<String, Coder<WindowedValue<?>>> remoteOutputCoders = new LinkedHashMap<>();
for (PCollectionNode outputPCollection : outputPCollections) {
WireCoderSetting wireCoderSetting = wireCoderSettings.stream().filter(ws -> ws.getInputOrOutputId().equals(outputPCollection.getId())).findAny().orElse(WireCoderSetting.getDefaultInstance());
OutputEncoding outputEncoding = addStageOutput(dataEndpoint, components, outputPCollection, wireCoderSetting);
remoteOutputCoders.put(outputEncoding.getPTransformId(), outputEncoding.getCoder());
}
return remoteOutputCoders;
}
use of org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode in project beam by apache.
the class BatchSideInputHandlerFactory method forIterableSideInput.
@Override
public <V, W extends BoundedWindow> IterableSideInputHandler<V, W> forIterableSideInput(String transformId, String sideInputId, Coder<V> elementCoder, Coder<W> windowCoder) {
PCollectionNode collectionNode = sideInputToCollection.get(SideInputId.newBuilder().setTransformId(transformId).setLocalName(sideInputId).build());
checkArgument(collectionNode != null, "No side input for %s/%s", transformId, sideInputId);
ImmutableMultimap.Builder<Object, V> windowToValuesBuilder = ImmutableMultimap.builder();
List<WindowedValue<V>> broadcastVariable = sideInputGetter.getSideInput(collectionNode.getId());
for (WindowedValue<V> windowedValue : broadcastVariable) {
for (BoundedWindow boundedWindow : windowedValue.getWindows()) {
@SuppressWarnings("unchecked") W window = (W) boundedWindow;
windowToValuesBuilder.put(windowCoder.structuralValue(window), windowedValue.getValue());
}
}
ImmutableMultimap<Object, V> windowToValues = windowToValuesBuilder.build();
return new IterableSideInputHandler<V, W>() {
@Override
public Iterable<V> get(W window) {
return windowToValues.get(windowCoder.structuralValue(window));
}
@Override
public Coder<V> elementCoder() {
return elementCoder;
}
};
}
Aggregations