use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class RegisterAndProcessBundleOperation method extractCrossBoundaryGrpcPCollectionNames.
private Set<String> extractCrossBoundaryGrpcPCollectionNames(final Set<Entry<String, PTransform>> ptransforms) {
Set<String> result = new HashSet<>();
// GRPC Read/Write expected to only have one Output/Input respectively.
for (Map.Entry<String, RunnerApi.PTransform> pTransform : ptransforms) {
if (pTransform.getValue().getSpec().getUrn().equals(RemoteGrpcPortRead.URN)) {
String grpcReadTransformOutputName = Iterables.getOnlyElement(pTransform.getValue().getOutputsMap().keySet());
String pcollectionName = pTransform.getValue().getOutputsMap().get(grpcReadTransformOutputName);
result.add(pcollectionName);
}
if (pTransform.getValue().getSpec().getUrn().equals(RemoteGrpcPortWrite.URN)) {
String grpcTransformInputName = Iterables.getOnlyElement(pTransform.getValue().getInputsMap().keySet());
String pcollectionName = pTransform.getValue().getInputsMap().get(grpcTransformInputName);
result.add(pcollectionName);
}
}
return result;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class CreateExecutableStageNodeFunction method transformCombineValuesFnToFunctionSpec.
/**
* Transforms a CombineValuesFn {@link ParDoInstruction} to an Apache Beam {@link
* RunnerApi.FunctionSpec}.
*/
private RunnerApi.FunctionSpec.Builder transformCombineValuesFnToFunctionSpec(CloudObject userFn) {
// Grab the Combine PTransform. This transform is the composite PTransform representing the
// entire CombinePerKey, and it contains the CombinePayload we need.
String combinePTransformId = getString(userFn, PropertyNames.SERIALIZED_FN);
RunnerApi.PTransform combinePerKeyPTransform = pipeline.getComponents().getTransformsOrDefault(combinePTransformId, null);
checkArgument(combinePerKeyPTransform != null, "Transform with id \"%s\" not found in pipeline.", combinePTransformId);
checkArgument(combinePerKeyPTransform.getSpec().getUrn().equals(COMBINE_PER_KEY_URN), "Found transform \"%s\" for Combine instruction, " + "but that transform had unexpected URN \"%s\" (expected \"%s\")", combinePerKeyPTransform, combinePerKeyPTransform.getSpec().getUrn(), COMBINE_PER_KEY_URN);
RunnerApi.CombinePayload combinePayload;
try {
combinePayload = RunnerApi.CombinePayload.parseFrom(combinePerKeyPTransform.getSpec().getPayload());
} catch (InvalidProtocolBufferException exc) {
throw new RuntimeException("Combine did not have a CombinePayload", exc);
}
String phase = getString(userFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
String urn;
switch(phase) {
case CombinePhase.ALL:
urn = COMBINE_GROUPED_VALUES_URN;
break;
case CombinePhase.ADD:
urn = COMBINE_PRECOMBINE_URN;
break;
case CombinePhase.MERGE:
urn = COMBINE_MERGE_URN;
break;
case CombinePhase.EXTRACT:
urn = COMBINE_EXTRACT_URN;
break;
default:
throw new RuntimeException("Encountered unknown Combine Phase: " + phase);
}
return RunnerApi.FunctionSpec.newBuilder().setUrn(urn).setPayload(combinePayload.toByteString());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class RegisterNodeFunction method transformSideInputForSdk.
/**
* Modifies the process bundle descriptor and updates the PTransform that the SDK harness will see
* with length prefixed coders used on the side input PCollection and windowing strategy.
*/
private static final void transformSideInputForSdk(RunnerApi.Pipeline pipeline, RunnerApi.PTransform originalPTransform, String sideInputTag, ProcessBundleDescriptor.Builder processBundleDescriptor, RunnerApi.PTransform.Builder updatedPTransform) {
RunnerApi.PCollection sideInputPCollection = pipeline.getComponents().getPcollectionsOrThrow(originalPTransform.getInputsOrThrow(sideInputTag));
RunnerApi.WindowingStrategy sideInputWindowingStrategy = pipeline.getComponents().getWindowingStrategiesOrThrow(sideInputPCollection.getWindowingStrategyId());
// TODO: We should not length prefix the window or key for the SDK side since the
// key and window are already length delimited via protobuf itself. But we need to
// maintain the length prefixing within the Runner harness to match the bytes that were
// materialized to the side input sink.
// We take the original pipeline coders and add any coders we have added when processing side
// inputs before building new length prefixed variants.
RunnerApi.Components.Builder componentsBuilder = pipeline.getComponents().toBuilder();
componentsBuilder.putAllCoders(processBundleDescriptor.getCodersMap());
String updatedSdkSideInputCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(sideInputPCollection.getCoderId(), componentsBuilder, false);
String updatedSdkSideInputWindowCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(sideInputWindowingStrategy.getWindowCoderId(), componentsBuilder, false);
processBundleDescriptor.putAllCoders(componentsBuilder.getCodersMap());
String updatedSdkWindowingStrategyId = SyntheticComponents.uniqueId(sideInputPCollection.getWindowingStrategyId() + "-runner_generated", processBundleDescriptor.getWindowingStrategiesMap().keySet()::contains);
processBundleDescriptor.putWindowingStrategies(updatedSdkWindowingStrategyId, sideInputWindowingStrategy.toBuilder().setWindowCoderId(updatedSdkSideInputWindowCoderId).build());
RunnerApi.PCollection updatedSdkSideInputPcollection = sideInputPCollection.toBuilder().setCoderId(updatedSdkSideInputCoderId).setWindowingStrategyId(updatedSdkWindowingStrategyId).build();
// Replace the contents of the PCollection with the updated side input PCollection
// specification and insert it into the update PTransform.
processBundleDescriptor.putPcollections(originalPTransform.getInputsOrThrow(sideInputTag), updatedSdkSideInputPcollection);
updatedPTransform.putInputs(sideInputTag, originalPTransform.getInputsOrThrow(sideInputTag));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class WindowMappingFnRunnerTest method testWindowMapping.
@Test
public void testWindowMapping() throws Exception {
String pTransformId = "pTransformId";
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(WindowMappingFnRunner.URN).setPayload(ParDoTranslation.translateWindowMappingFn(new GlobalWindows().getDefaultWindowMappingFn(), components).toByteString()).build();
RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).build();
ThrowingFunction<KV<Object, BoundedWindow>, KV<Object, BoundedWindow>> mapFunction = WindowMappingFnRunner.createMapFunctionForPTransform(pTransformId, pTransform);
KV<Object, BoundedWindow> input = KV.of("abc", new IntervalWindow(Instant.now(), Duration.standardMinutes(1)));
assertEquals(KV.of(input.getKey(), GlobalWindow.INSTANCE), mapFunction.apply(input));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class AssignWindowsRunnerTest method multipleInputWindowsAsMapFnSucceeds.
@Test
public void multipleInputWindowsAsMapFnSucceeds() throws Exception {
WindowFn<Object, BoundedWindow> windowFn = new WindowFn<Object, BoundedWindow>() {
@Override
public Collection<BoundedWindow> assignWindows(AssignContext c) {
c.window();
return ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3)));
}
@Override
public void mergeWindows(MergeContext c) {
throw new UnsupportedOperationException();
}
@Override
public WindowMappingFn<BoundedWindow> getDefaultWindowMappingFn() {
throw new UnsupportedOperationException();
}
@Override
public boolean isCompatible(WindowFn<?, ?> other) {
throw new UnsupportedOperationException();
}
@Override
public Coder<BoundedWindow> windowCoder() {
throw new UnsupportedOperationException();
}
};
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setUniqueName("input").setCoderId("coder-id").build();
RunnerApi.Coder coder = CoderTranslation.toProto(VarIntCoder.of()).getCoder();
PTransformRunnerFactoryTestContext context = PTransformRunnerFactoryTestContext.builder("ptransform", PTransform.newBuilder().putInputs("in", "input").putOutputs("out", "output").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(WindowingStrategyTranslation.toProto(windowFn, components)).build().toByteString())).build()).pCollections(Collections.singletonMap("input", pCollection)).coders(Collections.singletonMap("coder-id", coder)).build();
Collection<WindowedValue<?>> outputs = new ArrayList<>();
context.addPCollectionConsumer("output", outputs::add, VarIntCoder.of());
MapFnRunners.forWindowedValueMapFnFactory(new AssignWindowsMapFnFactory<>()).createRunnerForPTransform(context);
WindowedValue<Integer> value = WindowedValue.of(2, new Instant(-10L), ImmutableList.of(new IntervalWindow(new Instant(-22L), Duration.standardMinutes(5L)), new IntervalWindow(new Instant(-120000L), Duration.standardMinutes(3L))), PaneInfo.ON_TIME_AND_ONLY_FIRING);
context.getPCollectionConsumer("input").accept(value);
assertThat(outputs, containsInAnyOrder(WindowedValue.of(2, new Instant(-10L), ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3))), PaneInfo.ON_TIME_AND_ONLY_FIRING), WindowedValue.of(2, new Instant(-10L), ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3))), PaneInfo.ON_TIME_AND_ONLY_FIRING)));
}
Aggregations