use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ParDoTranslation method getMainInput.
public static RunnerApi.PCollection getMainInput(RunnerApi.PTransform ptransform, Components components) throws IOException {
checkArgument(ptransform.getSpec().getUrn().equals(PAR_DO_TRANSFORM_URN), "Unexpected payload type %s", ptransform.getSpec().getUrn());
ParDoPayload payload = ptransform.getSpec().getParameter().unpack(ParDoPayload.class);
String mainInputId = Iterables.getOnlyElement(Sets.difference(ptransform.getInputsMap().keySet(), payload.getSideInputsMap().keySet()));
return components.getPcollectionsOrThrow(ptransform.getInputsOrThrow(mainInputId));
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ParDoTranslation method toProto.
public static ParDoPayload toProto(ParDo.MultiOutput<?, ?> parDo, SdkComponents components) throws IOException {
DoFn<?, ?> doFn = parDo.getFn();
DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
Map<String, StateDeclaration> states = signature.stateDeclarations();
Map<String, TimerDeclaration> timers = signature.timerDeclarations();
List<Parameter> parameters = signature.processElement().extraParameters();
ParDoPayload.Builder builder = ParDoPayload.newBuilder();
builder.setDoFn(toProto(parDo.getFn(), parDo.getMainOutputTag()));
for (PCollectionView<?> sideInput : parDo.getSideInputs()) {
builder.putSideInputs(sideInput.getTagInternal().getId(), toProto(sideInput));
}
for (Parameter parameter : parameters) {
Optional<RunnerApi.Parameter> protoParameter = toProto(parameter);
if (protoParameter.isPresent()) {
builder.addParameters(protoParameter.get());
}
}
for (Map.Entry<String, StateDeclaration> state : states.entrySet()) {
RunnerApi.StateSpec spec = toProto(getStateSpecOrCrash(state.getValue(), doFn), components);
builder.putStateSpecs(state.getKey(), spec);
}
for (Map.Entry<String, TimerDeclaration> timer : timers.entrySet()) {
RunnerApi.TimerSpec spec = toProto(getTimerSpecOrCrash(timer.getValue(), doFn));
builder.putTimerSpecs(timer.getKey(), spec);
}
return builder.build();
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class GreedyPCollectionFusers method canFuseParDo.
/**
* A ParDo can be fused into a stage if it executes in the same Environment as that stage, and no
* transform that are upstream of any of its side input are present in that stage.
*
* <p>A ParDo that consumes a side input cannot process an element until all of the side inputs
* contain data for the side input window that contains the element.
*/
private static boolean canFuseParDo(PTransformNode parDo, Environment environment, PCollectionNode candidate, Collection<PCollectionNode> stagePCollections, QueryablePipeline pipeline) {
Optional<Environment> env = pipeline.getEnvironment(parDo);
checkArgument(env.isPresent(), "A %s must have an %s associated with it", ParDoPayload.class.getSimpleName(), Environment.class.getSimpleName());
if (!env.get().equals(environment)) {
// is never possible.
return false;
}
try {
ParDoPayload payload = ParDoPayload.parseFrom(parDo.getTransform().getSpec().getPayload());
if (Maps.filterKeys(parDo.getTransform().getInputsMap(), s -> payload.getTimerFamilySpecsMap().containsKey(s)).values().contains(candidate.getId())) {
// Allow fusion across timer PCollections because they are a self loop.
return true;
} else if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
// key-partitioned and preserves keys, these ParDos do not fuse into an existing stage.
return false;
} else if (!pipeline.getSideInputs(parDo).isEmpty()) {
// executable stage alongside any transforms which are upstream of any of its side inputs.
return false;
}
} catch (InvalidProtocolBufferException e) {
throw new IllegalArgumentException(e);
}
return true;
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ParDoTranslation method getAdditionalOutputTags.
public static TupleTagList getAdditionalOutputTags(AppliedPTransform<?, ?, ?> application) throws IOException {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
return ((ParDo.MultiOutput<?, ?>) transform).getAdditionalOutputTags();
}
RunnerApi.PTransform protoTransform = PTransformTranslation.toProto(application, SdkComponents.create(application.getPipeline().getOptions()));
ParDoPayload payload = ParDoPayload.parseFrom(protoTransform.getSpec().getPayload());
TupleTag<?> mainOutputTag = getMainOutputTag(payload);
Set<String> outputTags = Sets.difference(protoTransform.getOutputsMap().keySet(), Collections.singleton(mainOutputTag.getId()));
ArrayList<TupleTag<?>> additionalOutputTags = new ArrayList<>();
for (String outputTag : outputTags) {
additionalOutputTags.add(new TupleTag<>(outputTag));
}
return TupleTagList.of(additionalOutputTags);
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ParDoTranslation method getSideInputs.
public static List<PCollectionView<?>> getSideInputs(AppliedPTransform<?, ?, ?> application) throws IOException {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
return ((ParDo.MultiOutput<?, ?>) transform).getSideInputs().values().stream().collect(Collectors.toList());
}
SdkComponents sdkComponents = SdkComponents.create(application.getPipeline().getOptions());
RunnerApi.PTransform parDoProto = PTransformTranslation.toProto(application, sdkComponents);
ParDoPayload payload = ParDoPayload.parseFrom(parDoProto.getSpec().getPayload());
List<PCollectionView<?>> views = new ArrayList<>();
RehydratedComponents components = RehydratedComponents.forComponents(sdkComponents.toComponents());
for (Map.Entry<String, SideInput> sideInputEntry : payload.getSideInputsMap().entrySet()) {
String sideInputTag = sideInputEntry.getKey();
RunnerApi.SideInput sideInput = sideInputEntry.getValue();
PCollection<?> originalPCollection = checkNotNull((PCollection<?>) application.getInputs().get(new TupleTag<>(sideInputTag)), "no input with tag %s", sideInputTag);
views.add(PCollectionViewTranslation.viewFromProto(sideInput, sideInputTag, originalPCollection, parDoProto, components));
}
return views;
}
Aggregations