Search in sources :

Example 36 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class FusedPipeline method toPipeline.

/**
 * Returns the {@link RunnerApi.Pipeline} representation of this {@link FusedPipeline}.
 *
 * <p>The {@link Components} of the returned pipeline will contain all of the {@link PTransform
 * PTransforms} present in the original Pipeline that this {@link FusedPipeline} was created from,
 * plus all of the {@link ExecutableStage ExecutableStages} contained within this {@link
 * FusedPipeline}. The {@link Pipeline#getRootTransformIdsList()} will contain all of the runner
 * executed transforms and all of the {@link ExecutableStage execuable stages} contained within
 * the Pipeline.
 */
public RunnerApi.Pipeline toPipeline() {
    Map<String, PTransform> executableStageTransforms = getEnvironmentExecutedTransforms();
    Set<String> executableTransformIds = Sets.union(executableStageTransforms.keySet(), getRunnerExecutedTransforms().stream().map(PTransformNode::getId).collect(Collectors.toSet()));
    // Augment the initial transforms with all of the executable transforms.
    Components fusedComponents = getComponents().toBuilder().putAllTransforms(executableStageTransforms).build();
    List<String> rootTransformIds = StreamSupport.stream(QueryablePipeline.forTransforms(executableTransformIds, fusedComponents).getTopologicallyOrderedTransforms().spliterator(), false).map(PTransformNode::getId).collect(Collectors.toList());
    Pipeline res = Pipeline.newBuilder().setComponents(fusedComponents).addAllRootTransformIds(rootTransformIds).addAllRequirements(getRequirements()).build();
    // Validate that fusion didn't produce a malformed pipeline.
    PipelineValidator.validate(res);
    return res;
}
Also used : SyntheticComponents(org.apache.beam.runners.core.construction.SyntheticComponents) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)

Example 37 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class Environments method getEnvironment.

public static Optional<Environment> getEnvironment(String ptransformId, Components components) {
    PTransform ptransform = components.getTransformsOrThrow(ptransformId);
    String envId = ptransform.getEnvironmentId();
    if (Strings.isNullOrEmpty(envId)) {
        // as a GroupByKeyPayload, and we return null in this case.
        return Optional.empty();
    } else {
        return Optional.of(components.getEnvironmentsOrThrow(envId));
    }
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 38 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class PipelineValidator method validateParDo.

private static void validateParDo(String id, PTransform transform, Components components, Set<String> requirements) throws Exception {
    ParDoPayload payload = ParDoPayload.parseFrom(transform.getSpec().getPayload());
    // side_inputs
    for (String sideInputId : payload.getSideInputsMap().keySet()) {
        checkArgument(transform.containsInputs(sideInputId), "Transform %s side input %s is not listed in the transform's inputs", id, sideInputId);
    }
    if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN));
    // TODO: Validate state_specs and timer_specs
    }
    if (!payload.getRestrictionCoderId().isEmpty()) {
        checkArgument(components.containsCoders(payload.getRestrictionCoderId()));
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_SPLITTABLE_DOFN_URN));
    }
    if (payload.getRequestsFinalization()) {
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_BUNDLE_FINALIZATION_URN));
    }
    if (payload.getRequiresStableInput()) {
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STABLE_INPUT_URN));
    }
    if (payload.getRequiresTimeSortedInput()) {
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_TIME_SORTED_INPUT_URN));
    }
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)

Example 39 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class PipelineValidator method validateComponents.

private static void validateComponents(String context, Components components, Set<String> requirements) {
    {
        Map<String, String> uniqueNamesById = Maps.newHashMap();
        for (String transformId : components.getTransformsMap().keySet()) {
            PTransform transform = components.getTransformsOrThrow(transformId);
            String previousId = uniqueNamesById.put(transform.getUniqueName(), transformId);
            // A transform is allowed to not have unique_name set, but, obviously,
            // there can be only one such transform with an empty name.
            // It's allowed for the (only) root transform to have the empty unique_name.
            checkArgument(previousId == null, "%s: Transforms %s and %s both have unique_name \"%s\"", context, transformId, previousId, transform.getUniqueName());
            validateTransform(transformId, transform, components, requirements);
        }
    }
    {
        Map<String, String> uniqueNamesById = Maps.newHashMap();
        for (String pcollectionId : components.getPcollectionsMap().keySet()) {
            PCollection pc = components.getPcollectionsOrThrow(pcollectionId);
            checkArgument(!pc.getUniqueName().isEmpty(), "%s: PCollection %s does not have a unique_name set", context, pcollectionId);
            String previousId = uniqueNamesById.put(pc.getUniqueName(), pcollectionId);
            checkArgument(previousId == null, "%s: PCollections %s and %s both have unique_name \"%s\"", context, pcollectionId, previousId, pc.getUniqueName());
            checkArgument(components.containsCoders(pc.getCoderId()), "%s: PCollection %s uses unknown coder %s", context, pcollectionId, pc.getCoderId());
            checkArgument(components.containsWindowingStrategies(pc.getWindowingStrategyId()), "%s: PCollection %s uses unknown windowing strategy %s", context, pcollectionId, pc.getWindowingStrategyId());
        }
    }
    for (String strategyId : components.getWindowingStrategiesMap().keySet()) {
        WindowingStrategy strategy = components.getWindowingStrategiesOrThrow(strategyId);
        checkArgument(components.containsCoders(strategy.getWindowCoderId()), "%s: WindowingStrategy %s uses unknown coder %s", context, strategyId, strategy.getWindowCoderId());
    }
    for (String coderId : components.getCodersMap().keySet()) {
        for (String componentCoderId : components.getCodersOrThrow(coderId).getComponentCoderIdsList()) {
            checkArgument(components.containsCoders(componentCoderId), "%s: Coder %s uses unknown component coder %s", context, coderId, componentCoderId);
        }
    }
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) WindowingStrategy(org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 40 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class PipelineValidator method validateCombine.

private static void validateCombine(String id, PTransform transform, Components components, Set<String> requirements) throws Exception {
    CombinePayload payload = CombinePayload.parseFrom(transform.getSpec().getPayload());
    checkArgument(components.containsCoders(payload.getAccumulatorCoderId()), "Transform %s uses unknown accumulator coder id %s", id, payload.getAccumulatorCoderId());
}
Also used : CombinePayload(org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload)

Aggregations

Test (org.junit.Test)55 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)49 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)40 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)31 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)20 Map (java.util.Map)16 WindowedValue (org.apache.beam.sdk.util.WindowedValue)16 IOException (java.io.IOException)15 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)15 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)14 Coder (org.apache.beam.sdk.coders.Coder)14 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 Pipeline (org.apache.beam.sdk.Pipeline)13 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)11 KvCoder (org.apache.beam.sdk.coders.KvCoder)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)10