use of org.apache.beam.sdk.transforms.ViewFn in project beam by apache.
the class SparkSideInputReader method initializeBroadcastVariable.
private <T> Map<BoundedWindow, T> initializeBroadcastVariable(Iterable<WindowedValue<?>> inputValues, PCollectionView<T> view) {
// first partition into windows
Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = new HashMap<>();
for (WindowedValue<?> value : inputValues) {
for (BoundedWindow window : value.getWindows()) {
List<WindowedValue<?>> windowedValues = partitionedElements.computeIfAbsent(window, k -> new ArrayList<>());
windowedValues.add(value);
}
}
Map<BoundedWindow, T> resultMap = new HashMap<>();
for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
switch(view.getViewFn().getMaterialization().getUrn()) {
case Materializations.ITERABLE_MATERIALIZATION_URN:
{
ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
resultMap.put(elements.getKey(), viewFn.apply(() -> elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList())));
}
break;
case Materializations.MULTIMAP_MATERIALIZATION_URN:
{
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
}
break;
default:
throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
}
}
return resultMap;
}
use of org.apache.beam.sdk.transforms.ViewFn in project beam by apache.
the class SparkSideInputReader method get.
@Override
@Nullable
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
// --- validate sideInput.
checkNotNull(view, "The PCollectionView passed to sideInput cannot be null ");
KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>> windowedBroadcastHelper = sideInputs.get(view.getTagInternal());
checkNotNull(windowedBroadcastHelper, "SideInput for view " + view + " is not available.");
// --- sideInput window
final BoundedWindow sideInputWindow = view.getWindowMappingFn().getSideInputWindow(window);
// --- match the appropriate sideInput window.
// a tag will point to all matching sideInputs, that is all windows.
// now that we've obtained the appropriate sideInputWindow, all that's left is to filter by it.
Iterable<WindowedValue<?>> availableSideInputs = (Iterable<WindowedValue<?>>) windowedBroadcastHelper.getValue().getValue();
Iterable<?> sideInputForWindow = StreamSupport.stream(availableSideInputs.spliterator(), false).filter(sideInputCandidate -> {
if (sideInputCandidate == null) {
return false;
}
return Iterables.contains(sideInputCandidate.getWindows(), sideInputWindow);
}).collect(Collectors.toList()).stream().map(WindowedValue::getValue).collect(Collectors.toList());
switch(view.getViewFn().getMaterialization().getUrn()) {
case Materializations.ITERABLE_MATERIALIZATION_URN:
{
ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
return viewFn.apply(() -> sideInputForWindow);
}
case Materializations.MULTIMAP_MATERIALIZATION_URN:
{
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
return viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) sideInputForWindow));
}
default:
throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
}
}
use of org.apache.beam.sdk.transforms.ViewFn in project beam by apache.
the class Twister2SideInputReader method getMultimapSideInput.
private <T> T getMultimapSideInput(PCollectionView<T> view, BoundedWindow window) {
Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = getPartitionedElements(view);
Map<BoundedWindow, T> resultMap = new HashMap<>();
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
Coder keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
}
T result = resultMap.get(window);
if (result == null) {
result = viewFn.apply(InMemoryMultimapSideInputView.empty());
}
return result;
}
use of org.apache.beam.sdk.transforms.ViewFn in project beam by apache.
the class ParDoTranslation method viewFnFromProto.
private static ViewFn<?, ?> viewFnFromProto(SdkFunctionSpec viewFn) throws InvalidProtocolBufferException {
FunctionSpec spec = viewFn.getSpec();
checkArgument(spec.getUrn().equals(CUSTOM_JAVA_VIEW_FN_URN), "Can't deserialize unknown %s type %s", ViewFn.class.getSimpleName(), spec.getUrn());
return (ViewFn<?, ?>) SerializableUtils.deserializeFromByteArray(spec.getParameter().unpack(BytesValue.class).getValue().toByteArray(), "Custom ViewFn");
}
use of org.apache.beam.sdk.transforms.ViewFn in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method getSideInputIdToPCollectionViewMap.
private static LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> getSideInputIdToPCollectionViewMap(RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components) {
RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);
LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = new LinkedHashMap<>();
// for PCollectionView compatibility, not used to transform materialization
ViewFn<Iterable<WindowedValue<?>>, ?> viewFn = (ViewFn) new PCollectionViews.MultimapViewFn<>((PCollectionViews.TypeDescriptorSupplier<Iterable<WindowedValue<Void>>>) () -> TypeDescriptors.iterables(new TypeDescriptor<WindowedValue<Void>>() {
}), (PCollectionViews.TypeDescriptorSupplier<Void>) TypeDescriptors::voids);
for (RunnerApi.ExecutableStagePayload.SideInputId sideInputId : stagePayload.getSideInputsList()) {
// TODO: local name is unique as long as only one transform with side input can be within a
// stage
String sideInputTag = sideInputId.getLocalName();
String collectionId = components.getTransformsOrThrow(sideInputId.getTransformId()).getInputsOrThrow(sideInputId.getLocalName());
RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());
final WindowingStrategy<?, ?> windowingStrategy;
try {
windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
} catch (InvalidProtocolBufferException e) {
throw new IllegalStateException(String.format("Unable to hydrate side input windowing strategy %s.", windowingStrategyProto), e);
}
Coder<WindowedValue<Object>> coder = instantiateCoder(collectionId, components);
// side input materialization via GBK (T -> Iterable<T>)
WindowedValueCoder wvCoder = (WindowedValueCoder) coder;
coder = wvCoder.withValueCoder(IterableCoder.of(wvCoder.getValueCoder()));
sideInputs.put(sideInputId, new RunnerPCollectionView<>(null, new TupleTag<>(sideInputTag), viewFn, // TODO: support custom mapping fn
windowingStrategy.getWindowFn().getDefaultWindowMappingFn(), windowingStrategy, coder));
}
return sideInputs;
}
Aggregations