use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.
the class ProtoOverridesTest method replacesOnlyMatching.
@Test
public void replacesOnlyMatching() {
RunnerApi.Pipeline p = Pipeline.newBuilder().addAllRootTransformIds(ImmutableList.of("first", "second")).setComponents(Components.newBuilder().putTransforms("first", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:first")).build()).putTransforms("second", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:second")).build()).putPcollections("intermediatePc", PCollection.newBuilder().setUniqueName("intermediate").build()).putCoders("coder", Coder.newBuilder().setSpec(FunctionSpec.getDefaultInstance()).build())).build();
PTransform secondReplacement = PTransform.newBuilder().addSubtransforms("second_sub").setSpec(FunctionSpec.newBuilder().setUrn("beam:second:replacement").setPayload(ByteString.copyFrom("foo-bar-baz".getBytes(StandardCharsets.UTF_8)))).build();
WindowingStrategy introducedWS = WindowingStrategy.newBuilder().setAccumulationMode(AccumulationMode.Enum.ACCUMULATING).build();
RunnerApi.Components extraComponents = Components.newBuilder().putPcollections("intermediatePc", PCollection.newBuilder().setUniqueName("intermediate_replacement").build()).putWindowingStrategies("new_ws", introducedWS).putTransforms("second_sub", PTransform.getDefaultInstance()).build();
Pipeline updated = ProtoOverrides.updateTransform("beam:second", p, new TestReplacer(secondReplacement, extraComponents));
PTransform updatedSecond = updated.getComponents().getTransformsOrThrow("second");
assertThat(updatedSecond, equalTo(secondReplacement));
assertThat(updated.getComponents().getWindowingStrategiesOrThrow("new_ws"), equalTo(introducedWS));
assertThat(updated.getComponents().getTransformsOrThrow("second_sub"), equalTo(PTransform.getDefaultInstance()));
// TODO: This might not be appropriate. Merging in the other direction might force that callers
// are well behaved.
assertThat(updated.getComponents().getPcollectionsOrThrow("intermediatePc").getUniqueName(), equalTo("intermediate_replacement"));
// Assert that the untouched components are unchanged.
assertThat(updated.getComponents().getTransformsOrThrow("first"), equalTo(p.getComponents().getTransformsOrThrow("first")));
assertThat(updated.getComponents().getCodersOrThrow("coder"), equalTo(p.getComponents().getCodersOrThrow("coder")));
assertThat(updated.getRootTransformIdsList(), equalTo(p.getRootTransformIdsList()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.
the class FlinkBatchPortablePipelineTranslator method translateGroupByKey.
private static <K, V> void translateGroupByKey(PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) {
RunnerApi.Components components = pipeline.getComponents();
String inputPCollectionId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
PCollectionNode inputCollection = PipelineNode.pCollection(inputPCollectionId, components.getPcollectionsOrThrow(inputPCollectionId));
DataSet<WindowedValue<KV<K, V>>> inputDataSet = context.getDataSetOrThrow(inputPCollectionId);
RunnerApi.WindowingStrategy windowingStrategyProto = pipeline.getComponents().getWindowingStrategiesOrThrow(pipeline.getComponents().getPcollectionsOrThrow(inputPCollectionId).getWindowingStrategyId());
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents());
WindowingStrategy<Object, BoundedWindow> windowingStrategy;
try {
windowingStrategy = (WindowingStrategy<Object, BoundedWindow>) WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
} catch (InvalidProtocolBufferException e) {
throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
}
WindowedValueCoder<KV<K, V>> inputCoder;
try {
inputCoder = (WindowedValueCoder) WireCoders.instantiateRunnerWireCoder(inputCollection, pipeline.getComponents());
} catch (IOException e) {
throw new RuntimeException(e);
}
KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) inputCoder.getValueCoder();
Concatenate<V> combineFn = new Concatenate<>();
Coder<List<V>> accumulatorCoder = combineFn.getAccumulatorCoder(CoderRegistry.createDefault(), inputElementCoder.getValueCoder());
Coder<WindowedValue<KV<K, List<V>>>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder());
TypeInformation<WindowedValue<KV<K, List<V>>>> partialReduceTypeInfo = new CoderTypeInformation<>(outputCoder, context.getPipelineOptions());
Grouping<WindowedValue<KV<K, V>>> inputGrouping = inputDataSet.groupBy(new KvKeySelector<>(inputElementCoder.getKeyCoder()));
FlinkPartialReduceFunction<K, V, List<V>, ?> partialReduceFunction = new FlinkPartialReduceFunction<>(combineFn, windowingStrategy, Collections.emptyMap(), context.getPipelineOptions());
FlinkReduceFunction<K, List<V>, List<V>, ?> reduceFunction = new FlinkReduceFunction<>(combineFn, windowingStrategy, Collections.emptyMap(), context.getPipelineOptions());
// Partially GroupReduce the values into the intermediate format AccumT (combine)
GroupCombineOperator<WindowedValue<KV<K, V>>, WindowedValue<KV<K, List<V>>>> groupCombine = new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getTransform().getUniqueName());
Grouping<WindowedValue<KV<K, List<V>>>> intermediateGrouping = groupCombine.groupBy(new KvKeySelector<>(inputElementCoder.getKeyCoder()));
// Fully reduce the values and create output format VO
GroupReduceOperator<WindowedValue<KV<K, List<V>>>, WindowedValue<KV<K, List<V>>>> outputDataSet = new GroupReduceOperator<>(intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getTransform().getUniqueName());
context.addDataSet(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), outputDataSet);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method getSideInputIdToPCollectionViewMap.
private static LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> getSideInputIdToPCollectionViewMap(RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components) {
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);
LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = new LinkedHashMap<>();
// for PCollectionView compatibility, not used to transform materialization
ViewFn<Iterable<WindowedValue<?>>, ?> viewFn = (ViewFn) new PCollectionViews.MultimapViewFn<>((PCollectionViews.TypeDescriptorSupplier<Iterable<WindowedValue<Void>>>) () -> TypeDescriptors.iterables(new TypeDescriptor<WindowedValue<Void>>() {
}), (PCollectionViews.TypeDescriptorSupplier<Void>) TypeDescriptors::voids);
for (RunnerApi.ExecutableStagePayload.SideInputId sideInputId : stagePayload.getSideInputsList()) {
// TODO: local name is unique as long as only one transform with side input can be within a
// stage
String sideInputTag = sideInputId.getLocalName();
String collectionId = components.getTransformsOrThrow(sideInputId.getTransformId()).getInputsOrThrow(sideInputId.getLocalName());
RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());
final WindowingStrategy<?, ?> windowingStrategy;
try {
windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
} catch (InvalidProtocolBufferException e) {
throw new IllegalStateException(String.format("Unable to hydrate side input windowing strategy %s.", windowingStrategyProto), e);
}
Coder<WindowedValue<Object>> coder = instantiateCoder(collectionId, components);
// side input materialization via GBK (T -> Iterable<T>)
WindowedValueCoder wvCoder = (WindowedValueCoder) coder;
coder = wvCoder.withValueCoder(IterableCoder.of(wvCoder.getValueCoder()));
sideInputs.put(sideInputId, new RunnerPCollectionView<>(null, new TupleTag<>(sideInputTag), viewFn, // TODO: support custom mapping fn
windowingStrategy.getWindowFn().getDefaultWindowMappingFn(), windowingStrategy, coder));
}
return sideInputs;
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method translateGroupByKey.
private <K, V> void translateGroupByKey(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id);
String inputPCollectionId = Iterables.getOnlyElement(pTransform.getInputsMap().values());
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents());
RunnerApi.WindowingStrategy windowingStrategyProto = pipeline.getComponents().getWindowingStrategiesOrThrow(pipeline.getComponents().getPcollectionsOrThrow(inputPCollectionId).getWindowingStrategyId());
WindowingStrategy<?, ?> windowingStrategy;
try {
windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
} catch (InvalidProtocolBufferException e) {
throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
}
WindowedValueCoder<KV<K, V>> windowedInputCoder = (WindowedValueCoder) instantiateCoder(inputPCollectionId, pipeline.getComponents());
DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId);
SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream = addGBK(inputDataStream, windowingStrategy, windowedInputCoder, pTransform.getUniqueName(), context);
// Assign a unique but consistent id to re-map operator state
outputDataStream.uid(pTransform.getUniqueName());
context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), outputDataStream);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.
the class ExecutableStageDoFnOperatorTest method getOperator.
@SuppressWarnings("rawtypes")
private ExecutableStageDoFnOperator getOperator(TupleTag<Integer> mainOutput, List<TupleTag<?>> additionalOutputs, DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory, WindowingStrategy windowingStrategy, @Nullable Coder keyCoder, Coder windowedInputCoder) {
FlinkExecutableStageContextFactory contextFactory = Mockito.mock(FlinkExecutableStageContextFactory.class);
when(contextFactory.get(any())).thenReturn(stageContext);
final ExecutableStagePayload stagePayload;
if (keyCoder != null) {
stagePayload = this.stagePayloadWithUserState;
} else {
stagePayload = this.stagePayload;
}
ExecutableStageDoFnOperator<Integer, Integer> operator = new ExecutableStageDoFnOperator<>("transform", windowedInputCoder, Collections.emptyMap(), mainOutput, additionalOutputs, outputManagerFactory, Collections.emptyMap(), /* sideInputTagMapping */
Collections.emptyList(), /* sideInputs */
Collections.emptyMap(), /* sideInputId mapping */
FlinkPipelineOptions.defaults(), stagePayload, jobInfo, contextFactory, createOutputMap(mainOutput, additionalOutputs), windowingStrategy, keyCoder, keyCoder != null ? new KvToByteBufferKeySelector<>(keyCoder, null) : null);
Whitebox.setInternalState(operator, "stateRequestHandler", stateRequestHandler);
return operator;
}
Aggregations