Search in sources :

Example 61 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class ExecutableStageTest method testRoundTripToFromTransform.

@Test
public void testRoundTripToFromTransform() throws Exception {
    Environment env = org.apache.beam.runners.core.construction.Environments.createDockerEnvironment("foo");
    PTransform pt = PTransform.newBuilder().putInputs("input", "input.out").putInputs("side_input", "sideInput.in").putInputs("timer", "timer.out").putOutputs("output", "output.out").putOutputs("timer", "timer.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).putStateSpecs("user_state", StateSpec.getDefaultInstance()).putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("foo").build();
    PCollection input = PCollection.newBuilder().setUniqueName("input.out").build();
    PCollection sideInput = PCollection.newBuilder().setUniqueName("sideInput.in").build();
    PCollection timer = PCollection.newBuilder().setUniqueName("timer.out").build();
    PCollection output = PCollection.newBuilder().setUniqueName("output.out").build();
    Components components = Components.newBuilder().putTransforms("pt", pt).putPcollections("input.out", input).putPcollections("sideInput.in", sideInput).putPcollections("timer.out", timer).putPcollections("output.out", output).putEnvironments("foo", env).build();
    PTransformNode transformNode = PipelineNode.pTransform("pt", pt);
    SideInputReference sideInputRef = SideInputReference.of(transformNode, "side_input", PipelineNode.pCollection("sideInput.in", sideInput));
    UserStateReference userStateRef = UserStateReference.of(transformNode, "user_state", PipelineNode.pCollection("input.out", input));
    TimerReference timerRef = TimerReference.of(transformNode, "timer");
    ImmutableExecutableStage stage = ImmutableExecutableStage.of(components, env, PipelineNode.pCollection("input.out", input), Collections.singleton(sideInputRef), Collections.singleton(userStateRef), Collections.singleton(timerRef), Collections.singleton(PipelineNode.pTransform("pt", pt)), Collections.singleton(PipelineNode.pCollection("output.out", output)), DEFAULT_WIRE_CODER_SETTINGS);
    PTransform stagePTransform = stage.toPTransform("foo");
    assertThat(stagePTransform.getOutputsMap(), hasValue("output.out"));
    assertThat(stagePTransform.getOutputsCount(), equalTo(1));
    assertThat(stagePTransform.getInputsMap(), allOf(hasValue("input.out"), hasValue("sideInput.in")));
    assertThat(stagePTransform.getInputsCount(), equalTo(2));
    ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(stagePTransform.getSpec().getPayload());
    assertThat(payload.getTransformsList(), contains("pt"));
    assertThat(ExecutableStage.fromPayload(payload), equalTo(stage));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 62 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class FusedPipelineTest method assertRootsInTopologicalOrder.

// For each transform in the root transforms, asserts that all consumed PCollections have been
// produced, and no produced PCollection has been consumed
private void assertRootsInTopologicalOrder(RunnerApi.Pipeline fusedProto) {
    Set<String> consumedPCollections = new HashSet<>();
    Set<String> producedPCollections = new HashSet<>();
    for (int i = 0; i < fusedProto.getRootTransformIdsCount(); i++) {
        PTransform rootTransform = fusedProto.getComponents().getTransformsOrThrow(fusedProto.getRootTransformIds(i));
        assertThat(String.format("All %s consumed by %s must be produced before it", PCollection.class.getSimpleName(), fusedProto.getRootTransformIds(i)), producedPCollections, hasItems(rootTransform.getInputsMap().values().toArray(new String[0])));
        for (String consumed : consumedPCollections) {
            assertThat(String.format("%s %s was consumed before all of its producers produced it", PCollection.class.getSimpleName(), consumed), rootTransform.getOutputsMap().values(), not(hasItem(consumed)));
        }
        consumedPCollections.addAll(rootTransform.getInputsMap().values());
        producedPCollections.addAll(rootTransform.getOutputsMap().values());
    }
}
Also used : HashSet(java.util.HashSet) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 63 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class TestStreamTranslationTest method testRegistrarEncodedProto.

@Test
public void testRegistrarEncodedProto() throws Exception {
    PCollection<String> output = p.apply(testStream);
    AppliedPTransform<PBegin, PCollection<String>, TestStream<String>> appliedTestStream = AppliedPTransform.of("fakeName", PValues.expandInput(PBegin.in(p)), PValues.expandOutput(output), testStream, ResourceHints.create(), p);
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.FunctionSpec spec = PTransformTranslation.toProto(appliedTestStream, components).getSpec();
    assertThat(spec.getUrn(), equalTo(TEST_STREAM_TRANSFORM_URN));
    RunnerApi.TestStreamPayload payload = TestStreamPayload.parseFrom(spec.getPayload());
    verifyTestStreamEncoding(testStream, payload, RehydratedComponents.forComponents(components.toComponents()));
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) TestStreamPayload(org.apache.beam.model.pipeline.v1.RunnerApi.TestStreamPayload) PBegin(org.apache.beam.sdk.values.PBegin) TestStream(org.apache.beam.sdk.testing.TestStream) Test(org.junit.Test)

Example 64 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class PTransformTranslationTest method toAndFromProto.

@Test
public void toAndFromProto() throws IOException {
    SdkComponents components = SdkComponents.create(spec.getTransform().getPipeline().getOptions());
    RunnerApi.PTransform converted = convert(spec, components);
    Components protoComponents = components.toComponents();
    // Sanity checks
    assertThat(converted.getInputsCount(), equalTo(spec.getTransform().getInputs().size()));
    assertThat(converted.getOutputsCount(), equalTo(spec.getTransform().getOutputs().size()));
    assertThat(converted.getSubtransformsCount(), equalTo(spec.getChildren().size()));
    assertThat(converted.getUniqueName(), equalTo(spec.getTransform().getFullName()));
    for (PValue inputValue : spec.getTransform().getInputs().values()) {
        PCollection<?> inputPc = (PCollection<?>) inputValue;
        protoComponents.getPcollectionsOrThrow(components.registerPCollection(inputPc));
    }
    for (PValue outputValue : spec.getTransform().getOutputs().values()) {
        PCollection<?> outputPc = (PCollection<?>) outputValue;
        protoComponents.getPcollectionsOrThrow(components.registerPCollection(outputPc));
    }
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PCollection(org.apache.beam.sdk.values.PCollection) PValue(org.apache.beam.sdk.values.PValue) Test(org.junit.Test)

Example 65 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class PipelineTranslationTest method testResourceHints.

@Test
public void testResourceHints() {
    Pipeline pipeline = Pipeline.create();
    PCollection<byte[]> root = pipeline.apply(Impulse.create());
    ParDo.SingleOutput<byte[], byte[]> transform = ParDo.of(new IdentityDoFn<byte[]>());
    root.apply("Big", transform.setResourceHints(ResourceHints.create().withMinRam("640KB")));
    root.apply("Small", transform.setResourceHints(ResourceHints.create().withMinRam(1)));
    root.apply("AnotherBig", transform.setResourceHints(ResourceHints.create().withMinRam("640KB")));
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, false);
    assertThat(pipelineProto.getComponents().getEnvironmentsMap().get(getLeafTransform(pipelineProto, "Big").getEnvironmentId()).getResourceHintsMap(), org.hamcrest.Matchers.hasEntry("beam:resources:min_ram_bytes:v1", ByteString.copyFromUtf8("640000")));
    assertThat(pipelineProto.getComponents().getEnvironmentsMap().get(getLeafTransform(pipelineProto, "Small").getEnvironmentId()).getResourceHintsMap(), org.hamcrest.Matchers.hasEntry("beam:resources:min_ram_bytes:v1", ByteString.copyFromUtf8("1")));
    // Ensure we re-use environments.
    assertThat(getLeafTransform(pipelineProto, "Big").getEnvironmentId(), equalTo(getLeafTransform(pipelineProto, "AnotherBig").getEnvironmentId()));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ParDo(org.apache.beam.sdk.transforms.ParDo) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)45 Test (org.junit.Test)45 Pipeline (org.apache.beam.sdk.Pipeline)25 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)24 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 Map (java.util.Map)21 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)21 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)17 ArrayList (java.util.ArrayList)16 HashMap (java.util.HashMap)14 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)13 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 PCollection (org.apache.beam.sdk.values.PCollection)12 Coder (org.apache.beam.sdk.coders.Coder)11 KV (org.apache.beam.sdk.values.KV)11 Collection (java.util.Collection)10 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)9