use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.
the class ParDoEvaluator method create.
public static <InputT, OutputT> ParDoEvaluator<InputT> create(EvaluationContext evaluationContext, PipelineOptions options, DirectStepContext stepContext, AppliedPTransform<?, ?, ?> application, Coder<InputT> inputCoder, WindowingStrategy<?, ? extends BoundedWindow> windowingStrategy, DoFn<InputT, OutputT> fn, StructuralKey<?> key, List<PCollectionView<?>> sideInputs, TupleTag<OutputT> mainOutputTag, List<TupleTag<?>> additionalOutputTags, Map<TupleTag<?>, PCollection<?>> outputs, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping, DoFnRunnerFactory<InputT, OutputT> runnerFactory) {
BundleOutputManager outputManager = createOutputManager(evaluationContext, key, outputs);
ReadyCheckingSideInputReader sideInputReader = evaluationContext.createSideInputReader(sideInputs);
Map<TupleTag<?>, Coder<?>> outputCoders = outputs.entrySet().stream().collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue().getCoder()));
PushbackSideInputDoFnRunner<InputT, OutputT> runner = runnerFactory.createRunner(options, fn, sideInputs, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
return create(runner, stepContext, application, outputManager);
}
use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method getSideInputIdToPCollectionViewMap.
private static LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> getSideInputIdToPCollectionViewMap(RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components) {
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);
LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = new LinkedHashMap<>();
// for PCollectionView compatibility, not used to transform materialization
ViewFn<Iterable<WindowedValue<?>>, ?> viewFn = (ViewFn) new PCollectionViews.MultimapViewFn<>((PCollectionViews.TypeDescriptorSupplier<Iterable<WindowedValue<Void>>>) () -> TypeDescriptors.iterables(new TypeDescriptor<WindowedValue<Void>>() {
}), (PCollectionViews.TypeDescriptorSupplier<Void>) TypeDescriptors::voids);
for (RunnerApi.ExecutableStagePayload.SideInputId sideInputId : stagePayload.getSideInputsList()) {
// TODO: local name is unique as long as only one transform with side input can be within a
// stage
String sideInputTag = sideInputId.getLocalName();
String collectionId = components.getTransformsOrThrow(sideInputId.getTransformId()).getInputsOrThrow(sideInputId.getLocalName());
RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());
final WindowingStrategy<?, ?> windowingStrategy;
try {
windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
} catch (InvalidProtocolBufferException e) {
throw new IllegalStateException(String.format("Unable to hydrate side input windowing strategy %s.", windowingStrategyProto), e);
}
Coder<WindowedValue<Object>> coder = instantiateCoder(collectionId, components);
// side input materialization via GBK (T -> Iterable<T>)
WindowedValueCoder wvCoder = (WindowedValueCoder) coder;
coder = wvCoder.withValueCoder(IterableCoder.of(wvCoder.getValueCoder()));
sideInputs.put(sideInputId, new RunnerPCollectionView<>(null, new TupleTag<>(sideInputTag), viewFn, // TODO: support custom mapping fn
windowingStrategy.getWindowFn().getDefaultWindowMappingFn(), windowingStrategy, coder));
}
return sideInputs;
}
use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method transformSideInputs.
private TransformedSideInputs transformSideInputs(RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components, StreamingTranslationContext context) {
LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = getSideInputIdToPCollectionViewMap(stagePayload, components);
Map<TupleTag<?>, Integer> tagToIntMapping = new HashMap<>();
Map<Integer, PCollectionView<?>> intToViewMapping = new HashMap<>();
List<WindowedValueCoder<KV<Void, Object>>> kvCoders = new ArrayList<>();
List<Coder<?>> viewCoders = new ArrayList<>();
int count = 0;
for (Map.Entry<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInput : sideInputs.entrySet()) {
TupleTag<?> tag = sideInput.getValue().getTagInternal();
intToViewMapping.put(count, sideInput.getValue());
tagToIntMapping.put(tag, count);
count++;
String collectionId = components.getTransformsOrThrow(sideInput.getKey().getTransformId()).getInputsOrThrow(sideInput.getKey().getLocalName());
DataStream<Object> sideInputStream = context.getDataStreamOrThrow(collectionId);
TypeInformation<Object> tpe = sideInputStream.getType();
if (!(tpe instanceof CoderTypeInformation)) {
throw new IllegalStateException("Input Stream TypeInformation is no CoderTypeInformation.");
}
WindowedValueCoder<Object> coder = (WindowedValueCoder) ((CoderTypeInformation) tpe).getCoder();
Coder<KV<Void, Object>> kvCoder = KvCoder.of(VoidCoder.of(), coder.getValueCoder());
kvCoders.add(coder.withValueCoder(kvCoder));
// coder for materialized view matching GBK below
WindowedValueCoder<KV<Void, Iterable<Object>>> viewCoder = coder.withValueCoder(KvCoder.of(VoidCoder.of(), IterableCoder.of(coder.getValueCoder())));
viewCoders.add(viewCoder);
}
// second pass, now that we gathered the input coders
UnionCoder unionCoder = UnionCoder.of(viewCoders);
CoderTypeInformation<RawUnionValue> unionTypeInformation = new CoderTypeInformation<>(unionCoder, context.getPipelineOptions());
// transform each side input to RawUnionValue and union them
DataStream<RawUnionValue> sideInputUnion = null;
for (Map.Entry<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInput : sideInputs.entrySet()) {
TupleTag<?> tag = sideInput.getValue().getTagInternal();
final int intTag = tagToIntMapping.get(tag);
RunnerApi.PTransform pTransform = components.getTransformsOrThrow(sideInput.getKey().getTransformId());
String collectionId = pTransform.getInputsOrThrow(sideInput.getKey().getLocalName());
DataStream<WindowedValue<?>> sideInputStream = context.getDataStreamOrThrow(collectionId);
// insert GBK to materialize side input view
String viewName = sideInput.getKey().getTransformId() + "-" + sideInput.getKey().getLocalName();
WindowedValueCoder<KV<Void, Object>> kvCoder = kvCoders.get(intTag);
DataStream<WindowedValue<KV<Void, Object>>> keyedSideInputStream = sideInputStream.map(new ToVoidKeyValue(context.getPipelineOptions()));
SingleOutputStreamOperator<WindowedValue<KV<Void, Iterable<Object>>>> viewStream = addGBK(keyedSideInputStream, sideInput.getValue().getWindowingStrategyInternal(), kvCoder, viewName, context);
// Assign a unique but consistent id to re-map operator state
viewStream.uid(pTransform.getUniqueName() + "-" + sideInput.getKey().getLocalName());
DataStream<RawUnionValue> unionValueStream = viewStream.map(new FlinkStreamingTransformTranslators.ToRawUnion<>(intTag, context.getPipelineOptions())).returns(unionTypeInformation);
if (sideInputUnion == null) {
sideInputUnion = unionValueStream;
} else {
sideInputUnion = sideInputUnion.union(unionValueStream);
}
}
return new TransformedSideInputs(intToViewMapping, sideInputUnion);
}
use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.
the class BeamFnMapTaskExecutorFactory method createOperationTransformForRegisterFnNodes.
private Function<Node, Node> createOperationTransformForRegisterFnNodes(final IdGenerator idGenerator, final InstructionRequestHandler instructionRequestHandler, final StateDelegator beamFnStateDelegator, final String stageName, final DataflowExecutionContext<?> executionContext) {
return new TypeSafeNodeFunction<RegisterRequestNode>(RegisterRequestNode.class) {
@Override
public Node typedApply(RegisterRequestNode input) {
ImmutableMap.Builder<String, DataflowOperationContext> ptransformIdToOperationContextBuilder = ImmutableMap.builder();
ImmutableMap.Builder<String, DataflowStepContext> ptransformIdToStepContext = ImmutableMap.builder();
for (Map.Entry<String, NameContext> entry : input.getPTransformIdToPartialNameContextMap().entrySet()) {
NameContext fullNameContext = NameContext.create(stageName, entry.getValue().originalName(), entry.getValue().systemName(), entry.getValue().userName());
DataflowOperationContext operationContext = executionContext.createOperationContext(fullNameContext);
ptransformIdToOperationContextBuilder.put(entry.getKey(), operationContext);
ptransformIdToStepContext.put(entry.getKey(), executionContext.getStepContext(operationContext));
}
ImmutableMap.Builder<String, NameContext> pcollectionIdToNameContext = ImmutableMap.builder();
for (Map.Entry<String, NameContext> entry : input.getPCollectionToPartialNameContextMap().entrySet()) {
pcollectionIdToNameContext.put(entry.getKey(), NameContext.create(stageName, entry.getValue().originalName(), entry.getValue().systemName(), entry.getValue().userName()));
}
ImmutableMap<String, DataflowOperationContext> ptransformIdToOperationContexts = ptransformIdToOperationContextBuilder.build();
ImmutableMap<String, SideInputReader> ptransformIdToSideInputReaders = buildPTransformIdToSideInputReadersMap(executionContext, input, ptransformIdToOperationContexts);
ImmutableTable<String, String, PCollectionView<?>> ptransformIdToSideInputIdToPCollectionView = buildPTransformIdToSideInputIdToPCollectionView(input);
return OperationNode.create(new RegisterAndProcessBundleOperation(idGenerator, instructionRequestHandler, beamFnStateDelegator, input.getRegisterRequest(), ptransformIdToOperationContexts, ptransformIdToStepContext.build(), ptransformIdToSideInputReaders, ptransformIdToSideInputIdToPCollectionView, pcollectionIdToNameContext.build(), // TODO: Set NameContext properly for these operations.
executionContext.createOperationContext(NameContext.create(stageName, stageName, stageName, stageName))));
}
};
}
use of org.apache.beam.sdk.values.PCollectionView in project beam by apache.
the class CombineValuesFnFactory method create.
@Override
public ParDoFn create(PipelineOptions options, CloudObject cloudUserFn, @Nullable List<SideInputInfo> sideInputInfos, TupleTag<?> mainOutputTag, Map<TupleTag<?>, Integer> outputTupleTagsToReceiverIndices, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
Preconditions.checkArgument(outputTupleTagsToReceiverIndices.size() == 1, "expected exactly one output for CombineValuesFn");
Object deserializedFn = SerializableUtils.deserializeFromByteArray(getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN), "serialized user fn");
Preconditions.checkArgument(deserializedFn instanceof AppliedCombineFn);
AppliedCombineFn<?, ?, ?, ?> combineFn = (AppliedCombineFn<?, ?, ?, ?>) deserializedFn;
Iterable<PCollectionView<?>> sideInputViews = combineFn.getSideInputViews();
SideInputReader sideInputReader = executionContext.getSideInputReader(sideInputInfos, sideInputViews, operationContext);
// Get the combine phase, default to ALL. (The implementation
// doesn't have to split the combiner).
String phase = getString(cloudUserFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
DoFnInfo<?, ?> doFnInfo = getDoFnInfo(combineFn, sideInputReader, phase);
return new SimpleParDoFn(options, DoFnInstanceManagers.singleInstance(doFnInfo), sideInputReader, mainOutputTag, outputTupleTagsToReceiverIndices, executionContext.getStepContext(operationContext), operationContext, doFnInfo.getDoFnSchemaInformation(), doFnInfo.getSideInputMapping(), SimpleDoFnRunnerFactory.INSTANCE);
}
Aggregations