use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class PipelineValidator method validateCombine.
private static void validateCombine(String id, PTransform transform, Components components, Set<String> requirements) throws Exception {
CombinePayload payload = CombinePayload.parseFrom(transform.getSpec().getPayload());
checkArgument(components.containsCoders(payload.getAccumulatorCoderId()), "Transform %s uses unknown accumulator coder id %s", id, payload.getAccumulatorCoderId());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class QueryablePipeline method buildNetwork.
private MutableNetwork<PipelineNode, PipelineEdge> buildNetwork(Collection<String> transformIds, Components components) {
MutableNetwork<PipelineNode, PipelineEdge> network = NetworkBuilder.directed().allowsParallelEdges(true).allowsSelfLoops(false).build();
Set<PCollectionNode> unproducedCollections = new HashSet<>();
for (String transformId : transformIds) {
PTransform transform = components.getTransformsOrThrow(transformId);
PTransformNode transformNode = PipelineNode.pTransform(transformId, this.components.getTransformsOrThrow(transformId));
network.addNode(transformNode);
for (String produced : transform.getOutputsMap().values()) {
PCollectionNode producedNode = PipelineNode.pCollection(produced, components.getPcollectionsOrThrow(produced));
network.addNode(producedNode);
network.addEdge(transformNode, producedNode, new PerElementEdge());
checkArgument(network.inDegree(producedNode) == 1, "A %s should have exactly one producing %s, but found %s:\nPCollection:\n%s\nProducers:\n%s", PCollectionNode.class.getSimpleName(), PTransformNode.class.getSimpleName(), network.predecessors(producedNode).size(), producedNode, network.predecessors(producedNode));
unproducedCollections.remove(producedNode);
}
for (Map.Entry<String, String> consumed : transform.getInputsMap().entrySet()) {
// This loop may add an edge between the consumed PCollection and the current PTransform.
// The local name of the transform must be used to determine the type of edge.
String pcollectionId = consumed.getValue();
PCollectionNode consumedNode = PipelineNode.pCollection(pcollectionId, this.components.getPcollectionsOrThrow(pcollectionId));
if (network.addNode(consumedNode)) {
// This node has been added to the network for the first time, so it has no producer.
unproducedCollections.add(consumedNode);
}
if (getLocalSideInputNames(transform).contains(consumed.getKey())) {
network.addEdge(consumedNode, transformNode, new SingletonEdge());
} else {
network.addEdge(consumedNode, transformNode, new PerElementEdge());
}
}
}
checkArgument(unproducedCollections.isEmpty(), "%ss %s were consumed but never produced", PCollectionNode.class.getSimpleName(), unproducedCollections);
return network;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class QueryablePipeline method getPrimitiveTransformIds.
/**
* Produces a {@link RunnerApi.Components} which contains only primitive transforms.
*/
@VisibleForTesting
static Collection<String> getPrimitiveTransformIds(RunnerApi.Components components) {
Collection<String> ids = new LinkedHashSet<>();
for (Map.Entry<String, PTransform> transformEntry : components.getTransformsMap().entrySet()) {
PTransform transform = transformEntry.getValue();
boolean isPrimitive = isPrimitiveTransform(transform);
if (isPrimitive) {
// Sometimes "primitive" transforms have sub-transforms (and even deeper-nested
// descendents), due to runners
// either rewriting them in terms of runner-specific transforms, or SDKs constructing them
// in terms of other
// underlying transforms (see https://issues.apache.org/jira/browse/BEAM-5441).
// We consider any "leaf" descendents of these "primitive" transforms to be the true
// "primitives" that we
// preserve here; in the common case, this is just the "primitive" itself, which has no
// descendents).
Deque<String> transforms = new ArrayDeque<>();
transforms.push(transformEntry.getKey());
while (!transforms.isEmpty()) {
String id = transforms.pop();
PTransform next = components.getTransformsMap().get(id);
List<String> subtransforms = next.getSubtransformsList();
if (subtransforms.isEmpty()) {
ids.add(id);
} else {
transforms.addAll(subtransforms);
}
}
}
}
return ids;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class EnvironmentsTest method getEnvironmentPTransform.
@Test
public void getEnvironmentPTransform() throws IOException {
Pipeline p = Pipeline.create();
SdkComponents components = SdkComponents.create();
Environment env = Environments.createDockerEnvironment("java");
components.registerEnvironment(env);
ParDoPayload payload = ParDoTranslation.translateParDo(ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
}).withOutputTags(new TupleTag<>(), TupleTagList.empty()), PCollection.createPrimitiveOutputInternal(p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()), DoFnSchemaInformation.create(), Pipeline.create(), components);
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components.toComponents());
PTransform ptransform = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(payload.toByteString()).build()).setEnvironmentId(components.getOnlyEnvironmentId()).build();
Environment env1 = Environments.getEnvironment(ptransform, rehydratedComponents).get();
assertThat(env1, equalTo(components.toComponents().getEnvironmentsOrThrow(ptransform.getEnvironmentId())));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class OutputDeduplicator method deduplicateStageOutput.
/**
* Returns an {@link ExecutableStage} where all of the {@link PCollectionNode PCollections}
* matching the original are replaced with the introduced partial {@link PCollection} in all
* references made within the {@link ExecutableStage}.
*/
private static ExecutableStage deduplicateStageOutput(ExecutableStage stage, Map<String, PCollectionNode> originalToPartial) {
Collection<PTransformNode> updatedTransforms = new ArrayList<>();
for (PTransformNode transform : stage.getTransforms()) {
PTransform updatedTransform = updateOutputs(transform.getTransform(), originalToPartial);
updatedTransforms.add(PipelineNode.pTransform(transform.getId(), updatedTransform));
}
Collection<PCollectionNode> updatedOutputs = new ArrayList<>();
for (PCollectionNode output : stage.getOutputPCollections()) {
updatedOutputs.add(originalToPartial.getOrDefault(output.getId(), output));
}
RunnerApi.Components updatedStageComponents = stage.getComponents().toBuilder().clearTransforms().putAllTransforms(updatedTransforms.stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform))).putAllPcollections(originalToPartial.values().stream().collect(Collectors.toMap(PCollectionNode::getId, PCollectionNode::getPCollection))).build();
return ImmutableExecutableStage.of(updatedStageComponents, stage.getEnvironment(), stage.getInputPCollection(), stage.getSideInputs(), stage.getUserStates(), stage.getTimers(), updatedTransforms, updatedOutputs, stage.getWireCoderSettings());
}
Aggregations