use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.
the class KafkaIOExternalTest method testConstructKafkaWrite.
@Test
public void testConstructKafkaWrite() throws Exception {
String topic = "topic";
String keySerializer = "org.apache.kafka.common.serialization.ByteArraySerializer";
String valueSerializer = "org.apache.kafka.common.serialization.LongSerializer";
ImmutableMap<String, String> producerConfig = ImmutableMap.<String, String>builder().put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "server1:port,server2:port").put("retries", "3").build();
ExternalTransforms.ExternalConfigurationPayload payload = encodeRow(Row.withSchema(Schema.of(Field.of("topic", FieldType.STRING), Field.of("producer_config", FieldType.map(FieldType.STRING, FieldType.STRING)), Field.of("key_serializer", FieldType.STRING), Field.of("value_serializer", FieldType.STRING))).withFieldValue("topic", topic).withFieldValue("producer_config", producerConfig).withFieldValue("key_serializer", keySerializer).withFieldValue("value_serializer", valueSerializer).build());
Pipeline p = Pipeline.create();
p.apply(Impulse.create()).apply(WithKeys.of("key"));
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
String inputPCollection = Iterables.getOnlyElement(Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName("test").putInputs("input", inputPCollection).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(org.apache.beam.sdk.io.kafka.KafkaIO.Write.External.URN).setPayload(payload.toByteString()))).setNamespace("test_namespace").build();
ExpansionService expansionService = new ExpansionService();
TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
expansionService.expand(request, observer);
ExpansionApi.ExpansionResponse result = observer.result;
RunnerApi.PTransform transform = result.getTransform();
assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*Kafka-ProducerRecord.*")));
assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*KafkaIO-WriteRecords.*")));
assertThat(transform.getInputsCount(), Matchers.is(1));
assertThat(transform.getOutputsCount(), Matchers.is(0));
RunnerApi.PTransform writeComposite = result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
RunnerApi.PTransform writeParDo = result.getComponents().getTransformsOrThrow(result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(0)).getSubtransforms(0));
RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
DoFn kafkaWriter = ParDoTranslation.getDoFn(parDoPayload);
assertThat(kafkaWriter, Matchers.instanceOf(KafkaWriter.class));
KafkaIO.WriteRecords spec = (KafkaIO.WriteRecords) Whitebox.getInternalState(kafkaWriter, "spec");
assertThat(spec.getProducerConfig(), Matchers.is(producerConfig));
assertThat(spec.getTopic(), Matchers.is(topic));
assertThat(spec.getKeySerializer().getName(), Matchers.is(keySerializer));
assertThat(spec.getValueSerializer().getName(), Matchers.is(valueSerializer));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.
the class PubsubIOExternalTest method testConstructPubsubWrite.
@Test
public void testConstructPubsubWrite() throws Exception {
String topic = "projects/project-1234/topics/topic_name";
String idAttribute = "id_foo";
ExternalTransforms.ExternalConfigurationPayload payload = encodeRow(Row.withSchema(Schema.of(Field.of("topic", FieldType.STRING), Field.of("id_label", FieldType.STRING))).withFieldValue("topic", topic).withFieldValue("id_label", idAttribute).build());
// Requirements are not passed as part of the expansion service so the validation
// fails because of how we construct the pipeline to expand the transform since it now
// has a transform with a requirement.
Pipeline p = Pipeline.create();
p.apply("unbounded", Impulse.create()).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
String inputPCollection = Iterables.getOnlyElement(Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName("test").putInputs("input", inputPCollection).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(ExternalWrite.URN).setPayload(payload.toByteString()))).setNamespace("test_namespace").build();
ExpansionService expansionService = new ExpansionService();
TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
expansionService.expand(request, observer);
ExpansionApi.ExpansionResponse result = observer.result;
RunnerApi.PTransform transform = result.getTransform();
assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*MapElements.*")));
assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*PubsubUnboundedSink.*")));
assertThat(transform.getInputsCount(), Matchers.is(1));
assertThat(transform.getOutputsCount(), Matchers.is(0));
// test_namespacetest/PubsubUnboundedSink
RunnerApi.PTransform writeComposite = result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
// test_namespacetest/PubsubUnboundedSink/PubsubSink
RunnerApi.PTransform writeComposite2 = result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(1));
// test_namespacetest/PubsubUnboundedSink/PubsubSink/PubsubUnboundedSink.Writer
RunnerApi.PTransform writeComposite3 = result.getComponents().getTransformsOrThrow(writeComposite2.getSubtransforms(3));
// test_namespacetest/PubsubUnboundedSink/PubsubSink/PubsubUnboundedSink.Writer/ParMultiDo(Writer)
RunnerApi.PTransform writeParDo = result.getComponents().getTransformsOrThrow(writeComposite3.getSubtransforms(0));
RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
DoFn<?, ?> pubsubWriter = ParDoTranslation.getDoFn(parDoPayload);
String idAttributeActual = (String) Whitebox.getInternalState(pubsubWriter, "idAttribute");
ValueProvider<PubsubClient.TopicPath> topicActual = (ValueProvider<PubsubClient.TopicPath>) Whitebox.getInternalState(pubsubWriter, "topic");
assertThat(topicActual == null ? null : String.valueOf(topicActual), Matchers.is(topic));
assertThat(idAttributeActual, Matchers.is(idAttribute));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.
the class GreedyPCollectionFusers method canFuseParDo.
/**
* A ParDo can be fused into a stage if it executes in the same Environment as that stage, and no
* transform that are upstream of any of its side input are present in that stage.
*
* <p>A ParDo that consumes a side input cannot process an element until all of the side inputs
* contain data for the side input window that contains the element.
*/
private static boolean canFuseParDo(PTransformNode parDo, Environment environment, PCollectionNode candidate, Collection<PCollectionNode> stagePCollections, QueryablePipeline pipeline) {
Optional<Environment> env = pipeline.getEnvironment(parDo);
checkArgument(env.isPresent(), "A %s must have an %s associated with it", ParDoPayload.class.getSimpleName(), Environment.class.getSimpleName());
if (!env.get().equals(environment)) {
// is never possible.
return false;
}
try {
ParDoPayload payload = ParDoPayload.parseFrom(parDo.getTransform().getSpec().getPayload());
if (Maps.filterKeys(parDo.getTransform().getInputsMap(), s -> payload.getTimerFamilySpecsMap().containsKey(s)).values().contains(candidate.getId())) {
// Allow fusion across timer PCollections because they are a self loop.
return true;
} else if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
// key-partitioned and preserves keys, these ParDos do not fuse into an existing stage.
return false;
} else if (!pipeline.getSideInputs(parDo).isEmpty()) {
// executable stage alongside any transforms which are upstream of any of its side inputs.
return false;
}
} catch (InvalidProtocolBufferException e) {
throw new IllegalArgumentException(e);
}
return true;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ParDoTranslation method getAdditionalOutputTags.
public static TupleTagList getAdditionalOutputTags(AppliedPTransform<?, ?, ?> application) throws IOException {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
return ((ParDo.MultiOutput<?, ?>) transform).getAdditionalOutputTags();
}
RunnerApi.PTransform protoTransform = PTransformTranslation.toProto(application, SdkComponents.create(application.getPipeline().getOptions()));
ParDoPayload payload = ParDoPayload.parseFrom(protoTransform.getSpec().getPayload());
TupleTag<?> mainOutputTag = getMainOutputTag(payload);
Set<String> outputTags = Sets.difference(protoTransform.getOutputsMap().keySet(), Collections.singleton(mainOutputTag.getId()));
ArrayList<TupleTag<?>> additionalOutputTags = new ArrayList<>();
for (String outputTag : outputTags) {
additionalOutputTags.add(new TupleTag<>(outputTag));
}
return TupleTagList.of(additionalOutputTags);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.
the class ParDoTranslation method getSideInputs.
public static List<PCollectionView<?>> getSideInputs(AppliedPTransform<?, ?, ?> application) throws IOException {
PTransform<?, ?> transform = application.getTransform();
if (transform instanceof ParDo.MultiOutput) {
return ((ParDo.MultiOutput<?, ?>) transform).getSideInputs().values().stream().collect(Collectors.toList());
}
SdkComponents sdkComponents = SdkComponents.create(application.getPipeline().getOptions());
RunnerApi.PTransform parDoProto = PTransformTranslation.toProto(application, sdkComponents);
ParDoPayload payload = ParDoPayload.parseFrom(parDoProto.getSpec().getPayload());
List<PCollectionView<?>> views = new ArrayList<>();
RehydratedComponents components = RehydratedComponents.forComponents(sdkComponents.toComponents());
for (Map.Entry<String, SideInput> sideInputEntry : payload.getSideInputsMap().entrySet()) {
String sideInputTag = sideInputEntry.getKey();
RunnerApi.SideInput sideInput = sideInputEntry.getValue();
PCollection<?> originalPCollection = checkNotNull((PCollection<?>) application.getInputs().get(new TupleTag<>(sideInputTag)), "no input with tag %s", sideInputTag);
views.add(PCollectionViewTranslation.viewFromProto(sideInput, sideInputTag, originalPCollection, parDoProto, components));
}
return views;
}
Aggregations