use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class FnHarness method main.
@VisibleForTesting
public static void main(Function<String, String> environmentVarGetter) throws Exception {
JvmInitializers.runOnStartup();
System.out.format("SDK Fn Harness started%n");
System.out.format("Harness ID %s%n", environmentVarGetter.apply(HARNESS_ID));
System.out.format("Logging location %s%n", environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
System.out.format("Control location %s%n", environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
System.out.format("Status location %s%n", environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
System.out.format("Pipeline options %s%n", environmentVarGetter.apply(PIPELINE_OPTIONS));
String id = environmentVarGetter.apply(HARNESS_ID);
PipelineOptions options = PipelineOptionsTranslation.fromJson(environmentVarGetter.apply(PIPELINE_OPTIONS));
Endpoints.ApiServiceDescriptor loggingApiServiceDescriptor = getApiServiceDescriptor(environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
Endpoints.ApiServiceDescriptor controlApiServiceDescriptor = getApiServiceDescriptor(environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
Endpoints.ApiServiceDescriptor statusApiServiceDescriptor = environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR) == null ? null : getApiServiceDescriptor(environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
String runnerCapabilitesOrNull = environmentVarGetter.apply(RUNNER_CAPABILITIES);
Set<String> runnerCapabilites = runnerCapabilitesOrNull == null ? Collections.emptySet() : ImmutableSet.copyOf(runnerCapabilitesOrNull.split("\\s+"));
main(id, options, runnerCapabilites, loggingApiServiceDescriptor, controlApiServiceDescriptor, statusApiServiceDescriptor);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class PubSubWritePayloadTranslationTest method testTranslateSinkWithTopicOverridden.
@Test
public void testTranslateSinkWithTopicOverridden() throws Exception {
ValueProvider<TopicPath> runtimeProvider = pipeline.newProvider(TOPIC);
PubsubUnboundedSink pubsubUnboundedSinkSink = new PubsubUnboundedSink(null, runtimeProvider, TIMESTAMP_ATTRIBUTE, ID_ATTRIBUTE, 0, 0, 0, Duration.ZERO, null);
PubsubSink pubsubSink = new PubsubSink(pubsubUnboundedSinkSink);
PCollection<byte[]> input = pipeline.apply(Create.of(new byte[0]));
PDone output = input.apply(pubsubSink);
AppliedPTransform<?, ?, PubsubSink> appliedPTransform = AppliedPTransform.of("sink", PValues.expandInput(input), PValues.expandOutput(output), pubsubSink, ResourceHints.create(), pipeline);
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.FunctionSpec spec = sinkTranslator.translate(appliedPTransform, components);
assertEquals(PTransformTranslation.PUBSUB_WRITE, spec.getUrn());
PubSubWritePayload payload = PubSubWritePayload.parseFrom(spec.getPayload());
assertEquals(((NestedValueProvider) runtimeProvider).propertyName(), payload.getTopicRuntimeOverridden());
assertTrue(payload.getTopic().isEmpty());
assertEquals(TIMESTAMP_ATTRIBUTE, payload.getTimestampAttribute());
assertEquals(ID_ATTRIBUTE, payload.getIdAttribute());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class PubSubWritePayloadTranslationTest method testTranslateSinkWithTopic.
@Test
public void testTranslateSinkWithTopic() throws Exception {
PubsubUnboundedSink pubsubUnboundedSink = new PubsubUnboundedSink(null, StaticValueProvider.of(TOPIC), TIMESTAMP_ATTRIBUTE, ID_ATTRIBUTE, 0, 0, 0, Duration.ZERO, null);
PubsubUnboundedSink.PubsubSink pubsubSink = new PubsubSink(pubsubUnboundedSink);
PCollection<byte[]> input = pipeline.apply(Create.of(new byte[0]));
PDone output = input.apply(pubsubSink);
AppliedPTransform<?, ?, PubsubSink> appliedPTransform = AppliedPTransform.of("sink", PValues.expandInput(input), PValues.expandOutput(output), pubsubSink, ResourceHints.create(), pipeline);
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.FunctionSpec spec = sinkTranslator.translate(appliedPTransform, components);
assertEquals(PTransformTranslation.PUBSUB_WRITE, spec.getUrn());
PubSubWritePayload payload = PubSubWritePayload.parseFrom(spec.getPayload());
assertEquals(TOPIC.getFullPath(), payload.getTopic());
assertTrue(payload.getTopicRuntimeOverridden().isEmpty());
assertEquals(TIMESTAMP_ATTRIBUTE, payload.getTimestampAttribute());
assertEquals(ID_ATTRIBUTE, payload.getIdAttribute());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class PubsubIOExternalTest method testConstructPubsubWrite.
@Test
public void testConstructPubsubWrite() throws Exception {
String topic = "projects/project-1234/topics/topic_name";
String idAttribute = "id_foo";
ExternalTransforms.ExternalConfigurationPayload payload = encodeRow(Row.withSchema(Schema.of(Field.of("topic", FieldType.STRING), Field.of("id_label", FieldType.STRING))).withFieldValue("topic", topic).withFieldValue("id_label", idAttribute).build());
// Requirements are not passed as part of the expansion service so the validation
// fails because of how we construct the pipeline to expand the transform since it now
// has a transform with a requirement.
Pipeline p = Pipeline.create();
p.apply("unbounded", Impulse.create()).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
String inputPCollection = Iterables.getOnlyElement(Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName("test").putInputs("input", inputPCollection).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(ExternalWrite.URN).setPayload(payload.toByteString()))).setNamespace("test_namespace").build();
ExpansionService expansionService = new ExpansionService();
TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
expansionService.expand(request, observer);
ExpansionApi.ExpansionResponse result = observer.result;
RunnerApi.PTransform transform = result.getTransform();
assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*MapElements.*")));
assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*PubsubUnboundedSink.*")));
assertThat(transform.getInputsCount(), Matchers.is(1));
assertThat(transform.getOutputsCount(), Matchers.is(0));
// test_namespacetest/PubsubUnboundedSink
RunnerApi.PTransform writeComposite = result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
// test_namespacetest/PubsubUnboundedSink/PubsubSink
RunnerApi.PTransform writeComposite2 = result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(1));
// test_namespacetest/PubsubUnboundedSink/PubsubSink/PubsubUnboundedSink.Writer
RunnerApi.PTransform writeComposite3 = result.getComponents().getTransformsOrThrow(writeComposite2.getSubtransforms(3));
// test_namespacetest/PubsubUnboundedSink/PubsubSink/PubsubUnboundedSink.Writer/ParMultiDo(Writer)
RunnerApi.PTransform writeParDo = result.getComponents().getTransformsOrThrow(writeComposite3.getSubtransforms(0));
RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
DoFn<?, ?> pubsubWriter = ParDoTranslation.getDoFn(parDoPayload);
String idAttributeActual = (String) Whitebox.getInternalState(pubsubWriter, "idAttribute");
ValueProvider<PubsubClient.TopicPath> topicActual = (ValueProvider<PubsubClient.TopicPath>) Whitebox.getInternalState(pubsubWriter, "topic");
assertThat(topicActual == null ? null : String.valueOf(topicActual), Matchers.is(topic));
assertThat(idAttributeActual, Matchers.is(idAttribute));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class GreedyPCollectionFusers method canFuseParDo.
/**
* A ParDo can be fused into a stage if it executes in the same Environment as that stage, and no
* transform that are upstream of any of its side input are present in that stage.
*
* <p>A ParDo that consumes a side input cannot process an element until all of the side inputs
* contain data for the side input window that contains the element.
*/
private static boolean canFuseParDo(PTransformNode parDo, Environment environment, PCollectionNode candidate, Collection<PCollectionNode> stagePCollections, QueryablePipeline pipeline) {
Optional<Environment> env = pipeline.getEnvironment(parDo);
checkArgument(env.isPresent(), "A %s must have an %s associated with it", ParDoPayload.class.getSimpleName(), Environment.class.getSimpleName());
if (!env.get().equals(environment)) {
// is never possible.
return false;
}
try {
ParDoPayload payload = ParDoPayload.parseFrom(parDo.getTransform().getSpec().getPayload());
if (Maps.filterKeys(parDo.getTransform().getInputsMap(), s -> payload.getTimerFamilySpecsMap().containsKey(s)).values().contains(candidate.getId())) {
// Allow fusion across timer PCollections because they are a self loop.
return true;
} else if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
// key-partitioned and preserves keys, these ParDos do not fuse into an existing stage.
return false;
} else if (!pipeline.getSideInputs(parDo).isEmpty()) {
// executable stage alongside any transforms which are upstream of any of its side inputs.
return false;
}
} catch (InvalidProtocolBufferException e) {
throw new IllegalArgumentException(e);
}
return true;
}
Aggregations