use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class SparkSideInputReader method get.
@Override
@Nullable
public <T> T get(PCollectionView<T> view, BoundedWindow window) {
// --- validate sideInput.
checkNotNull(view, "The PCollectionView passed to sideInput cannot be null ");
KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>> windowedBroadcastHelper = sideInputs.get(view.getTagInternal());
checkNotNull(windowedBroadcastHelper, "SideInput for view " + view + " is not available.");
// --- sideInput window
final BoundedWindow sideInputWindow = view.getWindowMappingFn().getSideInputWindow(window);
// --- match the appropriate sideInput window.
// a tag will point to all matching sideInputs, that is all windows.
// now that we've obtained the appropriate sideInputWindow, all that's left is to filter by it.
Iterable<WindowedValue<?>> availableSideInputs = (Iterable<WindowedValue<?>>) windowedBroadcastHelper.getValue().getValue();
Iterable<?> sideInputForWindow = StreamSupport.stream(availableSideInputs.spliterator(), false).filter(sideInputCandidate -> {
if (sideInputCandidate == null) {
return false;
}
return Iterables.contains(sideInputCandidate.getWindows(), sideInputWindow);
}).collect(Collectors.toList()).stream().map(WindowedValue::getValue).collect(Collectors.toList());
switch(view.getViewFn().getMaterialization().getUrn()) {
case Materializations.ITERABLE_MATERIALIZATION_URN:
{
ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
return viewFn.apply(() -> sideInputForWindow);
}
case Materializations.MULTIMAP_MATERIALIZATION_URN:
{
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
return viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) sideInputForWindow));
}
default:
throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
}
}
use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class Twister2SideInputReader method getMultimapSideInput.
private <T> T getMultimapSideInput(PCollectionView<T> view, BoundedWindow window) {
Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = getPartitionedElements(view);
Map<BoundedWindow, T> resultMap = new HashMap<>();
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
Coder keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
}
T result = resultMap.get(window);
if (result == null) {
result = viewFn.apply(InMemoryMultimapSideInputView.empty());
}
return result;
}
use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class ParDoTranslation method translateParDo.
/**
* Translate a ParDo.
*/
public static <InputT> ParDoPayload translateParDo(ParDo.MultiOutput<InputT, ?> parDo, PCollection<InputT> mainInput, DoFnSchemaInformation doFnSchemaInformation, Pipeline pipeline, SdkComponents components) throws IOException {
final DoFn<?, ?> doFn = parDo.getFn();
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final String restrictionCoderId;
if (signature.processElement().isSplittable()) {
DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(doFn);
final Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of(doFnInvoker.invokeGetRestrictionCoder(pipeline.getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder(pipeline.getCoderRegistry()));
restrictionCoderId = components.registerCoder(restrictionAndWatermarkStateCoder);
} else {
restrictionCoderId = "";
}
Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) mainInput.getWindowingStrategy().getWindowFn().windowCoder();
Coder<?> keyCoder;
if (signature.usesState() || signature.usesTimers()) {
checkArgument(mainInput.getCoder() instanceof KvCoder, "DoFn's that use state or timers must have an input PCollection with a KvCoder but received %s", mainInput.getCoder());
keyCoder = ((KvCoder) mainInput.getCoder()).getKeyCoder();
} else {
keyCoder = null;
}
return payloadForParDoLike(new ParDoLike() {
@Override
public FunctionSpec translateDoFn(SdkComponents newComponents) {
return ParDoTranslation.translateDoFn(parDo.getFn(), parDo.getMainOutputTag(), parDo.getSideInputs(), doFnSchemaInformation, newComponents);
}
@Override
public Map<String, SideInput> translateSideInputs(SdkComponents components) {
Map<String, SideInput> sideInputs = new HashMap<>();
for (PCollectionView<?> sideInput : parDo.getSideInputs().values()) {
sideInputs.put(sideInput.getTagInternal().getId(), translateView(sideInput, components));
}
return sideInputs;
}
@Override
public Map<String, RunnerApi.StateSpec> translateStateSpecs(SdkComponents components) throws IOException {
Map<String, RunnerApi.StateSpec> stateSpecs = new HashMap<>();
for (Map.Entry<String, StateDeclaration> state : signature.stateDeclarations().entrySet()) {
RunnerApi.StateSpec spec = translateStateSpec(getStateSpecOrThrow(state.getValue(), doFn), components);
stateSpecs.put(state.getKey(), spec);
}
return stateSpecs;
}
@Override
public ParDoLikeTimerFamilySpecs translateTimerFamilySpecs(SdkComponents newComponents) {
Map<String, RunnerApi.TimerFamilySpec> timerFamilySpecs = new HashMap<>();
for (Map.Entry<String, TimerDeclaration> timer : signature.timerDeclarations().entrySet()) {
RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(getTimerSpecOrThrow(timer.getValue(), doFn), newComponents, keyCoder, windowCoder);
timerFamilySpecs.put(timer.getKey(), spec);
}
for (Map.Entry<String, DoFnSignature.TimerFamilyDeclaration> timerFamily : signature.timerFamilyDeclarations().entrySet()) {
RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(DoFnSignatures.getTimerFamilySpecOrThrow(timerFamily.getValue(), doFn), newComponents, keyCoder, windowCoder);
timerFamilySpecs.put(timerFamily.getKey(), spec);
}
String onWindowExpirationTimerFamilySpec = null;
if (signature.onWindowExpiration() != null) {
RunnerApi.TimerFamilySpec spec = RunnerApi.TimerFamilySpec.newBuilder().setTimeDomain(translateTimeDomain(TimeDomain.EVENT_TIME)).setTimerFamilyCoderId(registerCoderOrThrow(components, Timer.Coder.of(keyCoder, windowCoder))).build();
for (int i = 0; i < Integer.MAX_VALUE; ++i) {
onWindowExpirationTimerFamilySpec = "onWindowExpiration" + i;
if (!timerFamilySpecs.containsKey(onWindowExpirationTimerFamilySpec)) {
break;
}
}
timerFamilySpecs.put(onWindowExpirationTimerFamilySpec, spec);
}
return ParDoLikeTimerFamilySpecs.create(timerFamilySpecs, onWindowExpirationTimerFamilySpec);
}
@Override
public boolean isStateful() {
return !signature.stateDeclarations().isEmpty() || !signature.timerDeclarations().isEmpty() || !signature.timerFamilyDeclarations().isEmpty() || signature.onWindowExpiration() != null;
}
@Override
public boolean isSplittable() {
return signature.processElement().isSplittable();
}
@Override
public boolean isRequiresStableInput() {
return signature.processElement().requiresStableInput();
}
@Override
public boolean isRequiresTimeSortedInput() {
return signature.processElement().requiresTimeSortedInput();
}
@Override
public boolean requestsFinalization() {
return (signature.startBundle() != null && signature.startBundle().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.processElement() != null && signature.processElement().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.finishBundle() != null && signature.finishBundle().extraParameters().contains(Parameter.bundleFinalizer()));
}
@Override
public String translateRestrictionCoderId(SdkComponents newComponents) {
return restrictionCoderId;
}
}, components);
}
use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndStreamingSideInputs.
@Test
public void testCreateWithCombinerAndStreamingSideInputs() throws Exception {
StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
options.setStreaming(true);
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
when(mockSideInputReader.isEmpty()).thenReturn(false);
when(mockStreamingStepContext.stateInternals()).thenReturn((StateInternals) mockStateInternals);
when(mockStateInternals.state(Matchers.<StateNamespace>any(), Matchers.<StateTag>any())).thenReturn(mockState);
when(mockState.read()).thenReturn(Maps.newHashMap());
ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, mockStreamingStepContext);
assertTrue(pgbk instanceof StreamingSideInputPGBKParDoFn);
}
use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class ModelCodersTest method kvCoderComponentsToConstructor.
@Test
public void kvCoderComponentsToConstructor() throws IOException {
KvCoder<byte[], Iterable<Long>> javaCoder = KvCoder.of(ByteArrayCoder.of(), IterableCoder.of(LengthPrefixCoder.of(VarLongCoder.of())));
MessageWithComponents coderAndComponents = CoderTranslation.toProto(javaCoder);
KvCoderComponents kvCoderComponents = ModelCoders.getKvCoderComponents(coderAndComponents.getCoder());
Coder kvCoder = ModelCoders.kvCoder(kvCoderComponents.keyCoderId(), kvCoderComponents.valueCoderId());
assertThat(kvCoder, equalTo(coderAndComponents.getCoder()));
}
Aggregations