use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class ProcessBundleBenchmark method testState.
private static void testState(StatefulTransform statefulTransform, StateRequestHandler stateRequestHandler) throws Exception {
Map<String, ? super Coder<WindowedValue<?>>> remoteOutputCoders = statefulTransform.descriptor.getRemoteOutputCoders();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
AtomicInteger outputValuesCount = new AtomicInteger();
for (Entry<String, ? super Coder<WindowedValue<?>>> remoteOutputCoder : remoteOutputCoders.entrySet()) {
outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder) remoteOutputCoder.getValue(), (FnDataReceiver<? super WindowedValue<?>>) (WindowedValue<?> value) -> outputValuesCount.incrementAndGet()));
}
String key = Strings.padStart(Long.toHexString(Thread.currentThread().getId()), 16, '0');
try (RemoteBundle bundle = statefulTransform.processor.newBundle(outputReceivers, stateRequestHandler, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(KV.of(key, "zero")));
}
try (RemoteBundle bundle = statefulTransform.processor.newBundle(outputReceivers, stateRequestHandler, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(KV.of(key, "one")));
}
try (RemoteBundle bundle = statefulTransform.processor.newBundle(outputReceivers, stateRequestHandler, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(KV.of(key, "two")));
}
try (RemoteBundle bundle = statefulTransform.processor.newBundle(outputReceivers, stateRequestHandler, BundleProgressHandler.ignored())) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(KV.of(key, "flush")));
}
assertEquals(3, outputValuesCount.getAndSet(0));
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class SideInputHandler method addSideInputValue.
/**
* Add the given value to the internal side-input store of the given side input. This might change
* the result of {@link #isReady(PCollectionView, BoundedWindow)} for that side input.
*/
public void addSideInputValue(PCollectionView<?> sideInput, WindowedValue<Iterable<?>> value) {
@SuppressWarnings("unchecked") Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) sideInput.getWindowingStrategyInternal().getWindowFn().windowCoder();
StateTag<ValueState<Iterable<?>>> stateTag = sideInputContentsTags.get(sideInput);
for (BoundedWindow window : value.getWindows()) {
stateInternals.state(StateNamespaces.window(windowCoder, window), stateTag).write(value.getValue());
stateInternals.state(StateNamespaces.global(), availableWindowsTags.get(sideInput)).add(window);
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DoFnOperatorTest method testWatermarkUpdateAfterWatermarkHoldRelease.
@Test
public void testWatermarkUpdateAfterWatermarkHoldRelease() throws Exception {
Coder<WindowedValue<KV<String, String>>> coder = WindowedValue.getValueOnlyCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
List<Long> emittedWatermarkHolds = new ArrayList<>();
KeySelector<WindowedValue<KV<String, String>>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue().getKey(), StringUtf8Coder.of());
DoFnOperator<KV<String, String>, KV<String, String>> doFnOperator = new DoFnOperator<KV<String, String>, KV<String, String>>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), StringUtf8Coder.of(), keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()) {
@Override
protected DoFnRunner<KV<String, String>, KV<String, String>> createWrappingDoFnRunner(DoFnRunner<KV<String, String>, KV<String, String>> wrappedRunner, StepContext stepContext) {
StateNamespace namespace = StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
StateTag<WatermarkHoldState> holdTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.LATEST);
WatermarkHoldState holdState = stepContext.stateInternals().state(namespace, holdTag);
TimerInternals timerInternals = stepContext.timerInternals();
return new DoFnRunner<KV<String, String>, KV<String, String>>() {
@Override
public void startBundle() {
wrappedRunner.startBundle();
}
@Override
public void processElement(WindowedValue<KV<String, String>> elem) {
wrappedRunner.processElement(elem);
holdState.add(elem.getTimestamp());
timerInternals.setTimer(namespace, "timer", "family", elem.getTimestamp().plus(Duration.millis(1)), elem.getTimestamp().plus(Duration.millis(1)), TimeDomain.EVENT_TIME);
timerInternals.setTimer(namespace, "cleanup", "", GlobalWindow.INSTANCE.maxTimestamp(), GlobalWindow.INSTANCE.maxTimestamp(), TimeDomain.EVENT_TIME);
}
@Override
public <KeyT> void onTimer(String timerId, String timerFamilyId, KeyT key, BoundedWindow window, Instant timestamp, Instant outputTimestamp, TimeDomain timeDomain) {
if ("cleanup".equals(timerId)) {
holdState.clear();
} else {
holdState.add(outputTimestamp);
}
}
@Override
public void finishBundle() {
wrappedRunner.finishBundle();
}
@Override
public <KeyT> void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) {
wrappedRunner.onWindowExpiration(window, timestamp, key);
}
@Override
public DoFn<KV<String, String>, KV<String, String>> getFn() {
return doFn;
}
};
}
@Override
void emitWatermarkIfHoldChanged(long currentWatermarkHold) {
emittedWatermarkHolds.add(keyedStateInternals.minWatermarkHoldMs());
}
};
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<KV<String, String>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.setup();
Instant now = Instant.now();
testHarness.open();
// process first element, set hold to `now', setup timer for `now + 1'
testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now)));
assertThat(emittedWatermarkHolds, is(equalTo(Collections.singletonList(now.getMillis()))));
// fire timer, change hold to `now + 2'
testHarness.processWatermark(now.getMillis() + 2);
assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1))));
// process second element, verify we emitted changed hold
testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now.plus(Duration.millis(2)))));
assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1, now.getMillis() + 2))));
testHarness.processWatermark(GlobalWindow.INSTANCE.maxTimestamp().plus(Duration.millis(1)).getMillis());
testHarness.processWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
testHarness.close();
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class ParDoTranslation method translateParDo.
/**
* Translate a ParDo.
*/
public static <InputT> ParDoPayload translateParDo(ParDo.MultiOutput<InputT, ?> parDo, PCollection<InputT> mainInput, DoFnSchemaInformation doFnSchemaInformation, Pipeline pipeline, SdkComponents components) throws IOException {
final DoFn<?, ?> doFn = parDo.getFn();
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final String restrictionCoderId;
if (signature.processElement().isSplittable()) {
DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(doFn);
final Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of(doFnInvoker.invokeGetRestrictionCoder(pipeline.getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder(pipeline.getCoderRegistry()));
restrictionCoderId = components.registerCoder(restrictionAndWatermarkStateCoder);
} else {
restrictionCoderId = "";
}
Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) mainInput.getWindowingStrategy().getWindowFn().windowCoder();
Coder<?> keyCoder;
if (signature.usesState() || signature.usesTimers()) {
checkArgument(mainInput.getCoder() instanceof KvCoder, "DoFn's that use state or timers must have an input PCollection with a KvCoder but received %s", mainInput.getCoder());
keyCoder = ((KvCoder) mainInput.getCoder()).getKeyCoder();
} else {
keyCoder = null;
}
return payloadForParDoLike(new ParDoLike() {
@Override
public FunctionSpec translateDoFn(SdkComponents newComponents) {
return ParDoTranslation.translateDoFn(parDo.getFn(), parDo.getMainOutputTag(), parDo.getSideInputs(), doFnSchemaInformation, newComponents);
}
@Override
public Map<String, SideInput> translateSideInputs(SdkComponents components) {
Map<String, SideInput> sideInputs = new HashMap<>();
for (PCollectionView<?> sideInput : parDo.getSideInputs().values()) {
sideInputs.put(sideInput.getTagInternal().getId(), translateView(sideInput, components));
}
return sideInputs;
}
@Override
public Map<String, RunnerApi.StateSpec> translateStateSpecs(SdkComponents components) throws IOException {
Map<String, RunnerApi.StateSpec> stateSpecs = new HashMap<>();
for (Map.Entry<String, StateDeclaration> state : signature.stateDeclarations().entrySet()) {
RunnerApi.StateSpec spec = translateStateSpec(getStateSpecOrThrow(state.getValue(), doFn), components);
stateSpecs.put(state.getKey(), spec);
}
return stateSpecs;
}
@Override
public ParDoLikeTimerFamilySpecs translateTimerFamilySpecs(SdkComponents newComponents) {
Map<String, RunnerApi.TimerFamilySpec> timerFamilySpecs = new HashMap<>();
for (Map.Entry<String, TimerDeclaration> timer : signature.timerDeclarations().entrySet()) {
RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(getTimerSpecOrThrow(timer.getValue(), doFn), newComponents, keyCoder, windowCoder);
timerFamilySpecs.put(timer.getKey(), spec);
}
for (Map.Entry<String, DoFnSignature.TimerFamilyDeclaration> timerFamily : signature.timerFamilyDeclarations().entrySet()) {
RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(DoFnSignatures.getTimerFamilySpecOrThrow(timerFamily.getValue(), doFn), newComponents, keyCoder, windowCoder);
timerFamilySpecs.put(timerFamily.getKey(), spec);
}
String onWindowExpirationTimerFamilySpec = null;
if (signature.onWindowExpiration() != null) {
RunnerApi.TimerFamilySpec spec = RunnerApi.TimerFamilySpec.newBuilder().setTimeDomain(translateTimeDomain(TimeDomain.EVENT_TIME)).setTimerFamilyCoderId(registerCoderOrThrow(components, Timer.Coder.of(keyCoder, windowCoder))).build();
for (int i = 0; i < Integer.MAX_VALUE; ++i) {
onWindowExpirationTimerFamilySpec = "onWindowExpiration" + i;
if (!timerFamilySpecs.containsKey(onWindowExpirationTimerFamilySpec)) {
break;
}
}
timerFamilySpecs.put(onWindowExpirationTimerFamilySpec, spec);
}
return ParDoLikeTimerFamilySpecs.create(timerFamilySpecs, onWindowExpirationTimerFamilySpec);
}
@Override
public boolean isStateful() {
return !signature.stateDeclarations().isEmpty() || !signature.timerDeclarations().isEmpty() || !signature.timerFamilyDeclarations().isEmpty() || signature.onWindowExpiration() != null;
}
@Override
public boolean isSplittable() {
return signature.processElement().isSplittable();
}
@Override
public boolean isRequiresStableInput() {
return signature.processElement().requiresStableInput();
}
@Override
public boolean isRequiresTimeSortedInput() {
return signature.processElement().requiresTimeSortedInput();
}
@Override
public boolean requestsFinalization() {
return (signature.startBundle() != null && signature.startBundle().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.processElement() != null && signature.processElement().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.finishBundle() != null && signature.finishBundle().extraParameters().contains(Parameter.bundleFinalizer()));
}
@Override
public String translateRestrictionCoderId(SdkComponents newComponents) {
return restrictionCoderId;
}
}, components);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class CoderTranslation method fromKnownCoder.
private static Coder<?> fromKnownCoder(RunnerApi.Coder coder, RehydratedComponents components, TranslationContext context) throws IOException {
String coderUrn = coder.getSpec().getUrn();
List<Coder<?>> coderComponents = new ArrayList<>();
for (String componentId : coder.getComponentCoderIdsList()) {
// Only store coders in RehydratedComponents as long as we are not using a custom
// translation context.
Coder<?> innerCoder = context == TranslationContext.DEFAULT ? components.getCoder(componentId) : fromProto(components.getComponents().getCodersOrThrow(componentId), components, context);
coderComponents.add(innerCoder);
}
Class<? extends Coder> coderType = KNOWN_CODER_URNS.inverse().get(coderUrn);
CoderTranslator<?> translator = KNOWN_TRANSLATORS.get(coderType);
checkArgument(translator != null, "Unknown Coder URN %s. Known URNs: %s", coderUrn, KNOWN_CODER_URNS.values());
return translator.fromComponents(coderComponents, coder.getSpec().getPayload().toByteArray(), context);
}
Aggregations