use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method transformSideInputs.
private TransformedSideInputs transformSideInputs(RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components, StreamingTranslationContext context) {
LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = getSideInputIdToPCollectionViewMap(stagePayload, components);
Map<TupleTag<?>, Integer> tagToIntMapping = new HashMap<>();
Map<Integer, PCollectionView<?>> intToViewMapping = new HashMap<>();
List<WindowedValueCoder<KV<Void, Object>>> kvCoders = new ArrayList<>();
List<Coder<?>> viewCoders = new ArrayList<>();
int count = 0;
for (Map.Entry<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInput : sideInputs.entrySet()) {
TupleTag<?> tag = sideInput.getValue().getTagInternal();
intToViewMapping.put(count, sideInput.getValue());
tagToIntMapping.put(tag, count);
count++;
String collectionId = components.getTransformsOrThrow(sideInput.getKey().getTransformId()).getInputsOrThrow(sideInput.getKey().getLocalName());
DataStream<Object> sideInputStream = context.getDataStreamOrThrow(collectionId);
TypeInformation<Object> tpe = sideInputStream.getType();
if (!(tpe instanceof CoderTypeInformation)) {
throw new IllegalStateException("Input Stream TypeInformation is no CoderTypeInformation.");
}
WindowedValueCoder<Object> coder = (WindowedValueCoder) ((CoderTypeInformation) tpe).getCoder();
Coder<KV<Void, Object>> kvCoder = KvCoder.of(VoidCoder.of(), coder.getValueCoder());
kvCoders.add(coder.withValueCoder(kvCoder));
// coder for materialized view matching GBK below
WindowedValueCoder<KV<Void, Iterable<Object>>> viewCoder = coder.withValueCoder(KvCoder.of(VoidCoder.of(), IterableCoder.of(coder.getValueCoder())));
viewCoders.add(viewCoder);
}
// second pass, now that we gathered the input coders
UnionCoder unionCoder = UnionCoder.of(viewCoders);
CoderTypeInformation<RawUnionValue> unionTypeInformation = new CoderTypeInformation<>(unionCoder, context.getPipelineOptions());
// transform each side input to RawUnionValue and union them
DataStream<RawUnionValue> sideInputUnion = null;
for (Map.Entry<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInput : sideInputs.entrySet()) {
TupleTag<?> tag = sideInput.getValue().getTagInternal();
final int intTag = tagToIntMapping.get(tag);
RunnerApi.PTransform pTransform = components.getTransformsOrThrow(sideInput.getKey().getTransformId());
String collectionId = pTransform.getInputsOrThrow(sideInput.getKey().getLocalName());
DataStream<WindowedValue<?>> sideInputStream = context.getDataStreamOrThrow(collectionId);
// insert GBK to materialize side input view
String viewName = sideInput.getKey().getTransformId() + "-" + sideInput.getKey().getLocalName();
WindowedValueCoder<KV<Void, Object>> kvCoder = kvCoders.get(intTag);
DataStream<WindowedValue<KV<Void, Object>>> keyedSideInputStream = sideInputStream.map(new ToVoidKeyValue(context.getPipelineOptions()));
SingleOutputStreamOperator<WindowedValue<KV<Void, Iterable<Object>>>> viewStream = addGBK(keyedSideInputStream, sideInput.getValue().getWindowingStrategyInternal(), kvCoder, viewName, context);
// Assign a unique but consistent id to re-map operator state
viewStream.uid(pTransform.getUniqueName() + "-" + sideInput.getKey().getLocalName());
DataStream<RawUnionValue> unionValueStream = viewStream.map(new FlinkStreamingTransformTranslators.ToRawUnion<>(intTag, context.getPipelineOptions())).returns(unionTypeInformation);
if (sideInputUnion == null) {
sideInputUnion = unionValueStream;
} else {
sideInputUnion = sideInputUnion.union(unionValueStream);
}
}
return new TransformedSideInputs(intToViewMapping, sideInputUnion);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method translateGroupByKey.
private <K, V> void translateGroupByKey(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id);
String inputPCollectionId = Iterables.getOnlyElement(pTransform.getInputsMap().values());
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents());
RunnerApi.WindowingStrategy windowingStrategyProto = pipeline.getComponents().getWindowingStrategiesOrThrow(pipeline.getComponents().getPcollectionsOrThrow(inputPCollectionId).getWindowingStrategyId());
WindowingStrategy<?, ?> windowingStrategy;
try {
windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
} catch (InvalidProtocolBufferException e) {
throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
}
WindowedValueCoder<KV<K, V>> windowedInputCoder = (WindowedValueCoder) instantiateCoder(inputPCollectionId, pipeline.getComponents());
DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId);
SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream = addGBK(inputDataStream, windowingStrategy, windowedInputCoder, pTransform.getUniqueName(), context);
// Assign a unique but consistent id to re-map operator state
outputDataStream.uid(pTransform.getUniqueName());
context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), outputDataStream);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class ProcessBundleDescriptors method addStageInput.
private static RemoteInputDestination<WindowedValue<?>> addStageInput(ApiServiceDescriptor dataEndpoint, PCollectionNode inputPCollection, Components.Builder components, WireCoderSetting wireCoderSetting) throws IOException {
String inputWireCoderId = WireCoders.addSdkWireCoder(inputPCollection, components, wireCoderSetting);
@SuppressWarnings("unchecked") Coder<WindowedValue<?>> wireCoder = (Coder) WireCoders.instantiateRunnerWireCoder(inputPCollection, components.build(), wireCoderSetting);
RemoteGrpcPort inputPort = RemoteGrpcPort.newBuilder().setApiServiceDescriptor(dataEndpoint).setCoderId(inputWireCoderId).build();
String inputId = uniqueId(String.format("fn/read/%s", inputPCollection.getId()), components::containsTransforms);
PTransform inputTransform = RemoteGrpcPortRead.readFromPort(inputPort, inputPCollection.getId()).toPTransform();
components.putTransforms(inputId, inputTransform);
return RemoteInputDestination.of(wireCoder, inputId);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class JavaClassLookupTransformProvider method getMethod.
private Method getMethod(PTransform<PInput, POutput> transform, BuilderMethod builderMethod, AllowedClass allowListClass) {
Row builderMethodRow = decodeRow(builderMethod.getSchema(), builderMethod.getPayload());
List<Method> matchingMethods = Arrays.stream(transform.getClass().getMethods()).filter(m -> isBuilderMethodForName(m, builderMethod.getName(), allowListClass)).filter(m -> parametersCompatible(m.getParameters(), builderMethodRow)).filter(m -> PTransform.class.isAssignableFrom(m.getReturnType())).collect(Collectors.toList());
if (matchingMethods.size() == 0) {
throw new RuntimeException("Could not find a matching method in transform " + transform + " for BuilderMethod" + builderMethod + ". When using field names, make sure they are available in the compiled" + " Java class.");
} else if (matchingMethods.size() > 1) {
throw new RuntimeException("Expected to find exactly one matching method in transform " + transform + " for BuilderMethod" + builderMethod + " but found " + matchingMethods.size());
}
return matchingMethods.get(0);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class JavaClassLookupTransformProvider method applyBuilderMethods.
private PTransform<PInput, POutput> applyBuilderMethods(PTransform<PInput, POutput> transform, JavaClassLookupPayload payload, AllowedClass allowListClass) {
for (BuilderMethod builderMethod : payload.getBuilderMethodsList()) {
Method method = getMethod(transform, builderMethod, allowListClass);
try {
Row builderMethodRow = decodeRow(builderMethod.getSchema(), builderMethod.getPayload());
transform = (PTransform<PInput, POutput>) method.invoke(transform, getParameterValues(method.getParameters(), builderMethodRow, method.getGenericParameterTypes()));
} catch (IllegalAccessException | InvocationTargetException e) {
throw new IllegalArgumentException("Could not invoke the builder method " + builderMethod + " on transform " + transform + " with parameter schema " + builderMethod.getSchema(), e);
}
}
return transform;
}
Aggregations