use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class ExecutableStage method fromPayload.
/**
* Return an {@link ExecutableStage} constructed from the provided {@link FunctionSpec}
* representation.
*
* <p>See {@link #toPTransform} for how the payload is constructed.
*
* <p>Note: The payload contains some information redundant with the {@link PTransform} it is the
* payload of. The {@link ExecutableStagePayload} should be sufficiently rich to construct a
* {@code ProcessBundleDescriptor} using only the payload.
*/
static ExecutableStage fromPayload(ExecutableStagePayload payload) {
Components components = payload.getComponents();
Environment environment = payload.getEnvironment();
Collection<WireCoderSetting> wireCoderSettings = payload.getWireCoderSettingsList();
PCollectionNode input = PipelineNode.pCollection(payload.getInput(), components.getPcollectionsOrThrow(payload.getInput()));
List<SideInputReference> sideInputs = payload.getSideInputsList().stream().map(sideInputId -> SideInputReference.fromSideInputId(sideInputId, components)).collect(Collectors.toList());
List<UserStateReference> userStates = payload.getUserStatesList().stream().map(userStateId -> UserStateReference.fromUserStateId(userStateId, components)).collect(Collectors.toList());
List<TimerReference> timers = payload.getTimersList().stream().map(timerId -> TimerReference.fromTimerId(timerId, components)).collect(Collectors.toList());
List<PTransformNode> transforms = payload.getTransformsList().stream().map(id -> PipelineNode.pTransform(id, components.getTransformsOrThrow(id))).collect(Collectors.toList());
List<PCollectionNode> outputs = payload.getOutputsList().stream().map(id -> PipelineNode.pCollection(id, components.getPcollectionsOrThrow(id))).collect(Collectors.toList());
return ImmutableExecutableStage.of(components, environment, input, sideInputs, userStates, timers, transforms, outputs, wireCoderSettings);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class FusedPipeline method toPipeline.
/**
* Returns the {@link RunnerApi.Pipeline} representation of this {@link FusedPipeline}.
*
* <p>The {@link Components} of the returned pipeline will contain all of the {@link PTransform
* PTransforms} present in the original Pipeline that this {@link FusedPipeline} was created from,
* plus all of the {@link ExecutableStage ExecutableStages} contained within this {@link
* FusedPipeline}. The {@link Pipeline#getRootTransformIdsList()} will contain all of the runner
* executed transforms and all of the {@link ExecutableStage execuable stages} contained within
* the Pipeline.
*/
public RunnerApi.Pipeline toPipeline() {
Map<String, PTransform> executableStageTransforms = getEnvironmentExecutedTransforms();
Set<String> executableTransformIds = Sets.union(executableStageTransforms.keySet(), getRunnerExecutedTransforms().stream().map(PTransformNode::getId).collect(Collectors.toSet()));
// Augment the initial transforms with all of the executable transforms.
Components fusedComponents = getComponents().toBuilder().putAllTransforms(executableStageTransforms).build();
List<String> rootTransformIds = StreamSupport.stream(QueryablePipeline.forTransforms(executableTransformIds, fusedComponents).getTopologicallyOrderedTransforms().spliterator(), false).map(PTransformNode::getId).collect(Collectors.toList());
Pipeline res = Pipeline.newBuilder().setComponents(fusedComponents).addAllRootTransformIds(rootTransformIds).addAllRequirements(getRequirements()).build();
// Validate that fusion didn't produce a malformed pipeline.
PipelineValidator.validate(res);
return res;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class Environments method getEnvironment.
public static Optional<Environment> getEnvironment(String ptransformId, Components components) {
PTransform ptransform = components.getTransformsOrThrow(ptransformId);
String envId = ptransform.getEnvironmentId();
if (Strings.isNullOrEmpty(envId)) {
// as a GroupByKeyPayload, and we return null in this case.
return Optional.empty();
} else {
return Optional.of(components.getEnvironmentsOrThrow(envId));
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class PipelineValidator method validateParDo.
private static void validateParDo(String id, PTransform transform, Components components, Set<String> requirements) throws Exception {
ParDoPayload payload = ParDoPayload.parseFrom(transform.getSpec().getPayload());
// side_inputs
for (String sideInputId : payload.getSideInputsMap().keySet()) {
checkArgument(transform.containsInputs(sideInputId), "Transform %s side input %s is not listed in the transform's inputs", id, sideInputId);
}
if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN));
// TODO: Validate state_specs and timer_specs
}
if (!payload.getRestrictionCoderId().isEmpty()) {
checkArgument(components.containsCoders(payload.getRestrictionCoderId()));
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_SPLITTABLE_DOFN_URN));
}
if (payload.getRequestsFinalization()) {
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_BUNDLE_FINALIZATION_URN));
}
if (payload.getRequiresStableInput()) {
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STABLE_INPUT_URN));
}
if (payload.getRequiresTimeSortedInput()) {
checkArgument(requirements.contains(ParDoTranslation.REQUIRES_TIME_SORTED_INPUT_URN));
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class PipelineValidator method validateComponents.
private static void validateComponents(String context, Components components, Set<String> requirements) {
{
Map<String, String> uniqueNamesById = Maps.newHashMap();
for (String transformId : components.getTransformsMap().keySet()) {
PTransform transform = components.getTransformsOrThrow(transformId);
String previousId = uniqueNamesById.put(transform.getUniqueName(), transformId);
// A transform is allowed to not have unique_name set, but, obviously,
// there can be only one such transform with an empty name.
// It's allowed for the (only) root transform to have the empty unique_name.
checkArgument(previousId == null, "%s: Transforms %s and %s both have unique_name \"%s\"", context, transformId, previousId, transform.getUniqueName());
validateTransform(transformId, transform, components, requirements);
}
}
{
Map<String, String> uniqueNamesById = Maps.newHashMap();
for (String pcollectionId : components.getPcollectionsMap().keySet()) {
PCollection pc = components.getPcollectionsOrThrow(pcollectionId);
checkArgument(!pc.getUniqueName().isEmpty(), "%s: PCollection %s does not have a unique_name set", context, pcollectionId);
String previousId = uniqueNamesById.put(pc.getUniqueName(), pcollectionId);
checkArgument(previousId == null, "%s: PCollections %s and %s both have unique_name \"%s\"", context, pcollectionId, previousId, pc.getUniqueName());
checkArgument(components.containsCoders(pc.getCoderId()), "%s: PCollection %s uses unknown coder %s", context, pcollectionId, pc.getCoderId());
checkArgument(components.containsWindowingStrategies(pc.getWindowingStrategyId()), "%s: PCollection %s uses unknown windowing strategy %s", context, pcollectionId, pc.getWindowingStrategyId());
}
}
for (String strategyId : components.getWindowingStrategiesMap().keySet()) {
WindowingStrategy strategy = components.getWindowingStrategiesOrThrow(strategyId);
checkArgument(components.containsCoders(strategy.getWindowCoderId()), "%s: WindowingStrategy %s uses unknown coder %s", context, strategyId, strategy.getWindowCoderId());
}
for (String coderId : components.getCodersMap().keySet()) {
for (String componentCoderId : components.getCodersOrThrow(coderId).getComponentCoderIdsList()) {
checkArgument(components.containsCoders(componentCoderId), "%s: Coder %s uses unknown component coder %s", context, coderId, componentCoderId);
}
}
}
Aggregations