Search in sources :

Example 51 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class LengthPrefixUnknownCodersTest method test.

@Test
public void test() throws IOException {
    SdkComponents sdkComponents = SdkComponents.create();
    sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
    String coderId = sdkComponents.registerCoder(original);
    Components.Builder components = sdkComponents.toComponents().toBuilder();
    String updatedCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(coderId, components, replaceWithByteArray);
    assertEquals(expected, RehydratedComponents.forComponents(components.build()).getCoder(updatedCoderId));
}
Also used : SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Test(org.junit.Test)

Example 52 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class PipelineValidator method validateExecutableStage.

private static void validateExecutableStage(String id, PTransform transform, Components outerComponents, Set<String> requirements) throws Exception {
    ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(transform.getSpec().getPayload());
    // Everything within an ExecutableStagePayload uses only the stage's components.
    Components components = payload.getComponents();
    checkArgument(transform.getInputsMap().values().contains(payload.getInput()), "ExecutableStage %s uses unknown input %s", id, payload.getInput());
    checkArgument(!payload.getTransformsList().isEmpty(), "ExecutableStage %s contains no transforms", id);
    for (String subtransformId : payload.getTransformsList()) {
        checkArgument(components.containsTransforms(subtransformId), "ExecutableStage %s uses unknown transform %s", id, subtransformId);
    }
    for (String outputId : payload.getOutputsList()) {
        checkArgument(components.containsPcollections(outputId), "ExecutableStage %s uses unknown output %s", id, outputId);
    }
    validateComponents("ExecutableStage " + id, components, requirements);
// TODO: Also validate that side inputs of all transforms within components.getTransforms()
// are contained within payload.getSideInputsList()
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload)

Example 53 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class PipelineValidator method validate.

public static void validate(RunnerApi.Pipeline p) {
    Components components = p.getComponents();
    for (String transformId : p.getRootTransformIdsList()) {
        checkArgument(components.containsTransforms(transformId), "Root transform id %s is unknown", transformId);
    }
    validateComponents("pipeline", components, ImmutableSet.copyOf(p.getRequirementsList()));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components)

Example 54 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class Environments method getArtifacts.

public static List<ArtifactInformation> getArtifacts(List<String> stagingFiles) {
    ImmutableList.Builder<ArtifactInformation> artifactsBuilder = ImmutableList.builder();
    Set<String> deduplicatedStagingFiles = new LinkedHashSet<>(stagingFiles);
    for (String path : deduplicatedStagingFiles) {
        File file;
        String stagedName = null;
        if (path.contains("=")) {
            String[] components = path.split("=", 2);
            file = new File(components[1]);
            stagedName = components[0];
        } else {
            file = new File(path);
        }
        // Spurious items get added to the classpath. Filter by just those that exist.
        if (file.exists()) {
            ArtifactInformation.Builder artifactBuilder = ArtifactInformation.newBuilder();
            artifactBuilder.setTypeUrn(BeamUrns.getUrn(StandardArtifacts.Types.FILE));
            artifactBuilder.setRoleUrn(BeamUrns.getUrn(StandardArtifacts.Roles.STAGING_TO));
            HashCode hashCode;
            if (file.isDirectory()) {
                File zippedFile;
                try {
                    zippedFile = zipDirectory(file);
                    hashCode = Files.asByteSource(zippedFile).hash(Hashing.sha256());
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
                artifactBuilder.setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(zippedFile.getPath()).setSha256(hashCode.toString()).build().toByteString());
            } else {
                try {
                    hashCode = Files.asByteSource(file).hash(Hashing.sha256());
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
                artifactBuilder.setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(file.getPath()).setSha256(hashCode.toString()).build().toByteString());
            }
            if (stagedName == null) {
                stagedName = createStagingFileName(file, hashCode);
            }
            artifactBuilder.setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName(stagedName).build().toByteString());
            artifactsBuilder.add(artifactBuilder.build());
        }
    }
    return artifactsBuilder.build();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) HashCode(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.HashCode) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) IOException(java.io.IOException) ArtifactInformation(org.apache.beam.model.pipeline.v1.RunnerApi.ArtifactInformation) File(java.io.File)

Example 55 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class GreedyPipelineFuser method sanitizeDanglingPTransformInputs.

private static ExecutableStage sanitizeDanglingPTransformInputs(ExecutableStage stage) {
    /* Possible inputs to a PTransform can only be those which are:
     * <ul>
     *  <li>Explicit input PCollection to the stage
     *  <li>Outputs of a PTransform within the same stage
     *  <li>Timer PCollections
     *  <li>Side input PCollections
     *  <li>Explicit outputs from the stage
     * </ul>
     */
    Set<String> possibleInputs = new HashSet<>();
    possibleInputs.add(stage.getInputPCollection().getId());
    possibleInputs.addAll(stage.getOutputPCollections().stream().map(PCollectionNode::getId).collect(Collectors.toSet()));
    possibleInputs.addAll(stage.getSideInputs().stream().map(s -> s.collection().getId()).collect(Collectors.toSet()));
    possibleInputs.addAll(stage.getTransforms().stream().flatMap(t -> t.getTransform().getOutputsMap().values().stream()).collect(Collectors.toSet()));
    Set<String> danglingInputs = stage.getTransforms().stream().flatMap(t -> t.getTransform().getInputsMap().values().stream()).filter(in -> !possibleInputs.contains(in)).collect(Collectors.toSet());
    ImmutableList.Builder<PTransformNode> pTransformNodesBuilder = ImmutableList.builder();
    for (PTransformNode transformNode : stage.getTransforms()) {
        PTransform transform = transformNode.getTransform();
        Map<String, String> validInputs = transform.getInputsMap().entrySet().stream().filter(e -> !danglingInputs.contains(e.getValue())).collect(Collectors.toMap(Entry::getKey, Entry::getValue));
        if (!validInputs.equals(transform.getInputsMap())) {
            // Dangling inputs found so recreate pTransform without the dangling inputs.
            transformNode = PipelineNode.pTransform(transformNode.getId(), transform.toBuilder().clearInputs().putAllInputs(validInputs).build());
        }
        pTransformNodesBuilder.add(transformNode);
    }
    ImmutableList<PTransformNode> pTransformNodes = pTransformNodesBuilder.build();
    Components.Builder componentBuilder = stage.getComponents().toBuilder();
    // Update the pTransforms in components.
    componentBuilder.clearTransforms().putAllTransforms(pTransformNodes.stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform)));
    Map<String, PCollection> validPCollectionMap = stage.getComponents().getPcollectionsMap().entrySet().stream().filter(e -> !danglingInputs.contains(e.getKey())).collect(Collectors.toMap(Entry::getKey, Entry::getValue));
    // Update pCollections in the components.
    componentBuilder.clearPcollections().putAllPcollections(validPCollectionMap);
    return ImmutableExecutableStage.of(componentBuilder.build(), stage.getEnvironment(), stage.getInputPCollection(), stage.getSideInputs(), stage.getUserStates(), stage.getTimers(), pTransformNodes, stage.getOutputPCollections(), stage.getWireCoderSettings());
}
Also used : PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) HashMultimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.HashMultimap) TreeSet(java.util.TreeSet) ComparisonChain(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ComparisonChain) HashSet(java.util.HashSet) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) Sets(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) Multimap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Multimap) Map(java.util.Map) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) LinkedHashSet(java.util.LinkedHashSet) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) Logger(org.slf4j.Logger) Collection(java.util.Collection) Set(java.util.Set) NavigableSet(java.util.NavigableSet) DeduplicationResult(org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult) Collectors(java.util.stream.Collectors) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) AutoValue(com.google.auto.value.AutoValue) Entry(java.util.Map.Entry) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Queue(java.util.Queue) ArrayDeque(java.util.ArrayDeque) Comparator(java.util.Comparator) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Aggregations

Test (org.junit.Test)55 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)49 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)40 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)31 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)20 Map (java.util.Map)16 WindowedValue (org.apache.beam.sdk.util.WindowedValue)16 IOException (java.io.IOException)15 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)15 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)14 Coder (org.apache.beam.sdk.coders.Coder)14 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 Pipeline (org.apache.beam.sdk.Pipeline)13 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)11 KvCoder (org.apache.beam.sdk.coders.KvCoder)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)10