use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class ParDoTranslation method translateParDo.
public static ParDoPayload translateParDo(AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components) throws IOException {
final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform();
final Pipeline pipeline = appliedPTransform.getPipeline();
final DoFn<?, ?> doFn = parDo.getFn();
// Get main input.
Set<String> allInputs = appliedPTransform.getInputs().keySet().stream().map(TupleTag::getId).collect(Collectors.toSet());
Set<String> sideInputs = parDo.getSideInputs().values().stream().map(s -> s.getTagInternal().getId()).collect(Collectors.toSet());
String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs));
PCollection<?> mainInput = (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName));
final DoFnSchemaInformation doFnSchemaInformation = ParDo.getDoFnSchemaInformation(doFn, mainInput);
return translateParDo((ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class ParDoTranslation method translateParDo.
/**
* Translate a ParDo.
*/
public static <InputT> ParDoPayload translateParDo(ParDo.MultiOutput<InputT, ?> parDo, PCollection<InputT> mainInput, DoFnSchemaInformation doFnSchemaInformation, Pipeline pipeline, SdkComponents components) throws IOException {
final DoFn<?, ?> doFn = parDo.getFn();
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final String restrictionCoderId;
if (signature.processElement().isSplittable()) {
DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(doFn);
final Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of(doFnInvoker.invokeGetRestrictionCoder(pipeline.getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder(pipeline.getCoderRegistry()));
restrictionCoderId = components.registerCoder(restrictionAndWatermarkStateCoder);
} else {
restrictionCoderId = "";
}
Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) mainInput.getWindowingStrategy().getWindowFn().windowCoder();
Coder<?> keyCoder;
if (signature.usesState() || signature.usesTimers()) {
checkArgument(mainInput.getCoder() instanceof KvCoder, "DoFn's that use state or timers must have an input PCollection with a KvCoder but received %s", mainInput.getCoder());
keyCoder = ((KvCoder) mainInput.getCoder()).getKeyCoder();
} else {
keyCoder = null;
}
return payloadForParDoLike(new ParDoLike() {
@Override
public FunctionSpec translateDoFn(SdkComponents newComponents) {
return ParDoTranslation.translateDoFn(parDo.getFn(), parDo.getMainOutputTag(), parDo.getSideInputs(), doFnSchemaInformation, newComponents);
}
@Override
public Map<String, SideInput> translateSideInputs(SdkComponents components) {
Map<String, SideInput> sideInputs = new HashMap<>();
for (PCollectionView<?> sideInput : parDo.getSideInputs().values()) {
sideInputs.put(sideInput.getTagInternal().getId(), translateView(sideInput, components));
}
return sideInputs;
}
@Override
public Map<String, RunnerApi.StateSpec> translateStateSpecs(SdkComponents components) throws IOException {
Map<String, RunnerApi.StateSpec> stateSpecs = new HashMap<>();
for (Map.Entry<String, StateDeclaration> state : signature.stateDeclarations().entrySet()) {
RunnerApi.StateSpec spec = translateStateSpec(getStateSpecOrThrow(state.getValue(), doFn), components);
stateSpecs.put(state.getKey(), spec);
}
return stateSpecs;
}
@Override
public ParDoLikeTimerFamilySpecs translateTimerFamilySpecs(SdkComponents newComponents) {
Map<String, RunnerApi.TimerFamilySpec> timerFamilySpecs = new HashMap<>();
for (Map.Entry<String, TimerDeclaration> timer : signature.timerDeclarations().entrySet()) {
RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(getTimerSpecOrThrow(timer.getValue(), doFn), newComponents, keyCoder, windowCoder);
timerFamilySpecs.put(timer.getKey(), spec);
}
for (Map.Entry<String, DoFnSignature.TimerFamilyDeclaration> timerFamily : signature.timerFamilyDeclarations().entrySet()) {
RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(DoFnSignatures.getTimerFamilySpecOrThrow(timerFamily.getValue(), doFn), newComponents, keyCoder, windowCoder);
timerFamilySpecs.put(timerFamily.getKey(), spec);
}
String onWindowExpirationTimerFamilySpec = null;
if (signature.onWindowExpiration() != null) {
RunnerApi.TimerFamilySpec spec = RunnerApi.TimerFamilySpec.newBuilder().setTimeDomain(translateTimeDomain(TimeDomain.EVENT_TIME)).setTimerFamilyCoderId(registerCoderOrThrow(components, Timer.Coder.of(keyCoder, windowCoder))).build();
for (int i = 0; i < Integer.MAX_VALUE; ++i) {
onWindowExpirationTimerFamilySpec = "onWindowExpiration" + i;
if (!timerFamilySpecs.containsKey(onWindowExpirationTimerFamilySpec)) {
break;
}
}
timerFamilySpecs.put(onWindowExpirationTimerFamilySpec, spec);
}
return ParDoLikeTimerFamilySpecs.create(timerFamilySpecs, onWindowExpirationTimerFamilySpec);
}
@Override
public boolean isStateful() {
return !signature.stateDeclarations().isEmpty() || !signature.timerDeclarations().isEmpty() || !signature.timerFamilyDeclarations().isEmpty() || signature.onWindowExpiration() != null;
}
@Override
public boolean isSplittable() {
return signature.processElement().isSplittable();
}
@Override
public boolean isRequiresStableInput() {
return signature.processElement().requiresStableInput();
}
@Override
public boolean isRequiresTimeSortedInput() {
return signature.processElement().requiresTimeSortedInput();
}
@Override
public boolean requestsFinalization() {
return (signature.startBundle() != null && signature.startBundle().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.processElement() != null && signature.processElement().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.finishBundle() != null && signature.finishBundle().extraParameters().contains(Parameter.bundleFinalizer()));
}
@Override
public String translateRestrictionCoderId(SdkComponents newComponents) {
return restrictionCoderId;
}
}, components);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class FusedPipeline method toPipeline.
/**
* Returns the {@link RunnerApi.Pipeline} representation of this {@link FusedPipeline}.
*
* <p>The {@link Components} of the returned pipeline will contain all of the {@link PTransform
* PTransforms} present in the original Pipeline that this {@link FusedPipeline} was created from,
* plus all of the {@link ExecutableStage ExecutableStages} contained within this {@link
* FusedPipeline}. The {@link Pipeline#getRootTransformIdsList()} will contain all of the runner
* executed transforms and all of the {@link ExecutableStage execuable stages} contained within
* the Pipeline.
*/
public RunnerApi.Pipeline toPipeline() {
Map<String, PTransform> executableStageTransforms = getEnvironmentExecutedTransforms();
Set<String> executableTransformIds = Sets.union(executableStageTransforms.keySet(), getRunnerExecutedTransforms().stream().map(PTransformNode::getId).collect(Collectors.toSet()));
// Augment the initial transforms with all of the executable transforms.
Components fusedComponents = getComponents().toBuilder().putAllTransforms(executableStageTransforms).build();
List<String> rootTransformIds = StreamSupport.stream(QueryablePipeline.forTransforms(executableTransformIds, fusedComponents).getTopologicallyOrderedTransforms().spliterator(), false).map(PTransformNode::getId).collect(Collectors.toList());
Pipeline res = Pipeline.newBuilder().setComponents(fusedComponents).addAllRootTransformIds(rootTransformIds).addAllRequirements(getRequirements()).build();
// Validate that fusion didn't produce a malformed pipeline.
PipelineValidator.validate(res);
return res;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class GreedyStageFuser method getStageEnvironment.
private static Environment getStageEnvironment(QueryablePipeline pipeline, Set<PTransformNode> initialNodes) {
Supplier<IllegalArgumentException> missingEnv = () -> new IllegalArgumentException(String.format("%s must be populated on all %s in a %s", Environment.class.getSimpleName(), PTransformNode.class.getSimpleName(), GreedyStageFuser.class.getSimpleName()));
Environment env = pipeline.getEnvironment(initialNodes.iterator().next()).orElseThrow(missingEnv);
initialNodes.forEach(transformNode -> checkArgument(env.equals(pipeline.getEnvironment(transformNode).orElseThrow(missingEnv)), "All %s in a %s must be the same. Got %s and %s", Environment.class.getSimpleName(), ExecutableStage.class.getSimpleName(), env, pipeline.getEnvironment(transformNode).get()));
return env;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class ExecutableStageTranslationTest method testOperatorNameGeneration.
@Test
public /* Test for generating readable operator names during translation. */
void testOperatorNameGeneration() throws Exception {
Pipeline p = Pipeline.create();
p.apply(Impulse.create()).apply(ParDo.of(new DoFn<byte[], String>() {
@ProcessElement
public void processElement(ProcessContext processContext, OutputReceiver<String> outputReceiver) {
}
})).apply("MyName", ParDo.of(new DoFn<String, Integer>() {
@ProcessElement
public void processElement(ProcessContext processContext, OutputReceiver<Integer> outputReceiver) {
}
})).apply(// Avoid nested Anonymous ParDo
"Composite/Nested/ParDo", ParDo.of(new DoFn<Integer, Integer>() {
@ProcessElement
public void processElement(ProcessContext processContext, OutputReceiver<Integer> outputReceiver) {
}
}));
ExecutableStage firstEnvStage = GreedyPipelineFuser.fuse(PipelineTranslation.toProto(p)).getFusedStages().stream().findFirst().get();
RunnerApi.ExecutableStagePayload basePayload = RunnerApi.ExecutableStagePayload.parseFrom(firstEnvStage.toPTransform("foo").getSpec().getPayload());
String executableStageName = ExecutableStageTranslation.generateNameFromStagePayload(basePayload);
assertThat(executableStageName, is("[3]{ParDo(Anonymous), MyName, Composite}"));
}
Aggregations