Search in sources :

Example 6 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method getMainInput.

public static RunnerApi.PCollection getMainInput(RunnerApi.PTransform ptransform, Components components) throws IOException {
    checkArgument(ptransform.getSpec().getUrn().equals(PAR_DO_TRANSFORM_URN), "Unexpected payload type %s", ptransform.getSpec().getUrn());
    ParDoPayload payload = ptransform.getSpec().getParameter().unpack(ParDoPayload.class);
    String mainInputId = Iterables.getOnlyElement(Sets.difference(ptransform.getInputsMap().keySet(), payload.getSideInputsMap().keySet()));
    return components.getPcollectionsOrThrow(ptransform.getInputsOrThrow(mainInputId));
}
Also used : ParDoPayload(org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload) ByteString(com.google.protobuf.ByteString)

Example 7 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method toProto.

public static ParDoPayload toProto(ParDo.MultiOutput<?, ?> parDo, SdkComponents components) throws IOException {
    DoFn<?, ?> doFn = parDo.getFn();
    DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    Map<String, StateDeclaration> states = signature.stateDeclarations();
    Map<String, TimerDeclaration> timers = signature.timerDeclarations();
    List<Parameter> parameters = signature.processElement().extraParameters();
    ParDoPayload.Builder builder = ParDoPayload.newBuilder();
    builder.setDoFn(toProto(parDo.getFn(), parDo.getMainOutputTag()));
    for (PCollectionView<?> sideInput : parDo.getSideInputs()) {
        builder.putSideInputs(sideInput.getTagInternal().getId(), toProto(sideInput));
    }
    for (Parameter parameter : parameters) {
        Optional<RunnerApi.Parameter> protoParameter = toProto(parameter);
        if (protoParameter.isPresent()) {
            builder.addParameters(protoParameter.get());
        }
    }
    for (Map.Entry<String, StateDeclaration> state : states.entrySet()) {
        RunnerApi.StateSpec spec = toProto(getStateSpecOrCrash(state.getValue(), doFn), components);
        builder.putStateSpecs(state.getKey(), spec);
    }
    for (Map.Entry<String, TimerDeclaration> timer : timers.entrySet()) {
        RunnerApi.TimerSpec spec = toProto(getTimerSpecOrCrash(timer.getValue(), doFn));
        builder.putTimerSpecs(timer.getKey(), spec);
    }
    return builder.build();
}
Also used : ParDoPayload(org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload) ByteString(com.google.protobuf.ByteString) TimerDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerDeclaration) RunnerApi(org.apache.beam.sdk.common.runner.v1.RunnerApi) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) WindowParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.WindowParameter) RestrictionTrackerParameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter) Map(java.util.Map) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration)

Example 8 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class GreedyPCollectionFusers method canFuseParDo.

/**
 * A ParDo can be fused into a stage if it executes in the same Environment as that stage, and no
 * transform that are upstream of any of its side input are present in that stage.
 *
 * <p>A ParDo that consumes a side input cannot process an element until all of the side inputs
 * contain data for the side input window that contains the element.
 */
private static boolean canFuseParDo(PTransformNode parDo, Environment environment, PCollectionNode candidate, Collection<PCollectionNode> stagePCollections, QueryablePipeline pipeline) {
    Optional<Environment> env = pipeline.getEnvironment(parDo);
    checkArgument(env.isPresent(), "A %s must have an %s associated with it", ParDoPayload.class.getSimpleName(), Environment.class.getSimpleName());
    if (!env.get().equals(environment)) {
        // is never possible.
        return false;
    }
    try {
        ParDoPayload payload = ParDoPayload.parseFrom(parDo.getTransform().getSpec().getPayload());
        if (Maps.filterKeys(parDo.getTransform().getInputsMap(), s -> payload.getTimerFamilySpecsMap().containsKey(s)).values().contains(candidate.getId())) {
            // Allow fusion across timer PCollections because they are a self loop.
            return true;
        } else if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
            // key-partitioned and preserves keys, these ParDos do not fuse into an existing stage.
            return false;
        } else if (!pipeline.getSideInputs(parDo).isEmpty()) {
            // executable stage alongside any transforms which are upstream of any of its side inputs.
            return false;
        }
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalArgumentException(e);
    }
    return true;
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment)

Example 9 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method getAdditionalOutputTags.

public static TupleTagList getAdditionalOutputTags(AppliedPTransform<?, ?, ?> application) throws IOException {
    PTransform<?, ?> transform = application.getTransform();
    if (transform instanceof ParDo.MultiOutput) {
        return ((ParDo.MultiOutput<?, ?>) transform).getAdditionalOutputTags();
    }
    RunnerApi.PTransform protoTransform = PTransformTranslation.toProto(application, SdkComponents.create(application.getPipeline().getOptions()));
    ParDoPayload payload = ParDoPayload.parseFrom(protoTransform.getSpec().getPayload());
    TupleTag<?> mainOutputTag = getMainOutputTag(payload);
    Set<String> outputTags = Sets.difference(protoTransform.getOutputsMap().keySet(), Collections.singleton(mainOutputTag.getId()));
    ArrayList<TupleTag<?>> additionalOutputTags = new ArrayList<>();
    for (String outputTag : outputTags) {
        additionalOutputTags.add(new TupleTag<>(outputTag));
    }
    return TupleTagList.of(additionalOutputTags);
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput)

Example 10 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method getSideInputs.

public static List<PCollectionView<?>> getSideInputs(AppliedPTransform<?, ?, ?> application) throws IOException {
    PTransform<?, ?> transform = application.getTransform();
    if (transform instanceof ParDo.MultiOutput) {
        return ((ParDo.MultiOutput<?, ?>) transform).getSideInputs().values().stream().collect(Collectors.toList());
    }
    SdkComponents sdkComponents = SdkComponents.create(application.getPipeline().getOptions());
    RunnerApi.PTransform parDoProto = PTransformTranslation.toProto(application, sdkComponents);
    ParDoPayload payload = ParDoPayload.parseFrom(parDoProto.getSpec().getPayload());
    List<PCollectionView<?>> views = new ArrayList<>();
    RehydratedComponents components = RehydratedComponents.forComponents(sdkComponents.toComponents());
    for (Map.Entry<String, SideInput> sideInputEntry : payload.getSideInputsMap().entrySet()) {
        String sideInputTag = sideInputEntry.getKey();
        RunnerApi.SideInput sideInput = sideInputEntry.getValue();
        PCollection<?> originalPCollection = checkNotNull((PCollection<?>) application.getInputs().get(new TupleTag<>(sideInputTag)), "no input with tag %s", sideInputTag);
        views.add(PCollectionViewTranslation.viewFromProto(sideInput, sideInputTag, originalPCollection, parDoProto, components));
    }
    return views;
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PCollectionView(org.apache.beam.sdk.values.PCollectionView) ParDo(org.apache.beam.sdk.transforms.ParDo) Map(java.util.Map) HashMap(java.util.HashMap) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput)

Aggregations

ParDoPayload (org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)10 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)6 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 ByteString (com.google.protobuf.ByteString)2 IOException (java.io.IOException)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Set (java.util.Set)2 PTransformRunnerFactory (org.apache.beam.fn.harness.PTransformRunnerFactory)2 BundleProcessorCache (org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache)2 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)2 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)2 Pipeline (org.apache.beam.sdk.Pipeline)2 ParDoPayload (org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload)2 MultiOutput (org.apache.beam.sdk.transforms.ParDo.MultiOutput)2 DoFnWithExecutionInformation (org.apache.beam.sdk.util.DoFnWithExecutionInformation)2