use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndBatchSideInputs.
@Test
public void testCreateWithCombinerAndBatchSideInputs() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
StepContext stepContext = BatchModeExecutionContext.forTesting(options, "testStage").getStepContext(TestOperationContext.create(counterSet));
when(mockSideInputReader.isEmpty()).thenReturn(false);
ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder, ImmutableList.<PCollectionView<?>>of(), WindowingStrategy.globalDefault()), mockSideInputReader, receiver, stepContext);
assertTrue(pgbk instanceof BatchSideInputPGBKParDoFn);
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class UserParDoFnFactoryTest method testCleanupWorks.
@Test
public void testCleanupWorks() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
CounterSet counters = new CounterSet();
DoFn<?, ?> initialFn = new TestStatefulDoFn();
CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
StateInternals stateInternals = InMemoryStateInternals.forKey("dummy");
// The overarching step context that only ParDoFn gets
DataflowStepContext stepContext = mock(DataflowStepContext.class);
// The user step context that the DoFnRunner gets a handle on
DataflowStepContext userStepContext = mock(DataflowStepContext.class);
when(stepContext.namespacedToUser()).thenReturn(userStepContext);
when(stepContext.stateInternals()).thenReturn(stateInternals);
when(userStepContext.stateInternals()).thenReturn((StateInternals) stateInternals);
DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
TestOperationContext operationContext = TestOperationContext.create(counters);
when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
Receiver rcvr = new OutputReceiver();
parDoFn.startBundle(rcvr);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
StateNamespace firstWindowNamespace = StateNamespaces.window(windowCoder, firstWindow);
StateNamespace secondWindowNamespace = StateNamespaces.window(windowCoder, secondWindow);
StateTag<ValueState<String>> tag = StateTags.tagForSpec(TestStatefulDoFn.STATE_ID, StateSpecs.value(StringUtf8Coder.of()));
// Set up non-empty state. We don't mock + verify calls to clear() but instead
// check that state is actually empty. We musn't care how it is accomplished.
stateInternals.state(firstWindowNamespace, tag).write("first");
stateInternals.state(secondWindowNamespace, tag).write("second");
when(userStepContext.getNextFiredTimer(windowCoder)).thenReturn(null);
when(stepContext.getNextFiredTimer(windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindowNamespace, firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
// This should fire the timer to clean up the first window
parDoFn.processTimers();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
when(stepContext.getNextFiredTimer((Coder) windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, secondWindowNamespace, secondWindow.maxTimestamp().plus(Duration.millis(1L)), secondWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
// And this should clean up the second window
parDoFn.processTimers();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class WindmillTimerInternalsTest method testTimerDataToFromTimer.
@Test
public void testTimerDataToFromTimer() {
for (String stateFamily : TEST_STATE_FAMILIES) {
for (KV<Coder<? extends BoundedWindow>, StateNamespace> coderAndNamespace : TEST_NAMESPACES_WITH_CODERS) {
@Nullable Coder<? extends BoundedWindow> coder = coderAndNamespace.getKey();
StateNamespace namespace = coderAndNamespace.getValue();
for (TimeDomain timeDomain : TimeDomain.values()) {
for (WindmillNamespacePrefix prefix : WindmillNamespacePrefix.values()) {
for (Instant timestamp : TEST_TIMESTAMPS) {
List<TimerData> anonymousTimers = ImmutableList.of(TimerData.of(namespace, timestamp, timestamp, timeDomain), TimerData.of(namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain));
for (TimerData timer : anonymousTimers) {
assertThat(WindmillTimerInternals.windmillTimerToTimerData(prefix, WindmillTimerInternals.timerDataToWindmillTimer(stateFamily, prefix, timer), coder), equalTo(timer));
}
for (String timerId : TEST_TIMER_IDS) {
List<TimerData> timers = ImmutableList.of(TimerData.of(timerId, namespace, timestamp, timestamp, timeDomain), TimerData.of(timerId, "family", namespace, timestamp, timestamp, timeDomain), TimerData.of(timerId, namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain), TimerData.of(timerId, "family", namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain));
for (TimerData timer : timers) {
assertThat(WindmillTimerInternals.windmillTimerToTimerData(prefix, WindmillTimerInternals.timerDataToWindmillTimer(stateFamily, prefix, timer), coder), equalTo(timer));
}
}
}
}
}
}
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class BatchSideInputHandlerFactory method forMultimapSideInput.
@Override
public <K, V, W extends BoundedWindow> MultimapSideInputHandler<K, V, W> forMultimapSideInput(String transformId, String sideInputId, KvCoder<K, V> elementCoder, Coder<W> windowCoder) {
PCollectionNode collectionNode = sideInputToCollection.get(SideInputId.newBuilder().setTransformId(transformId).setLocalName(sideInputId).build());
checkArgument(collectionNode != null, "No side input for %s/%s", transformId, sideInputId);
Coder<K> keyCoder = elementCoder.getKeyCoder();
Map<Object, Map<Object, KV<K, List<V>>>> /* structural key */
data = new HashMap<>();
List<WindowedValue<KV<K, V>>> broadcastVariable = sideInputGetter.getSideInput(collectionNode.getId());
for (WindowedValue<KV<K, V>> windowedValue : broadcastVariable) {
K key = windowedValue.getValue().getKey();
V value = windowedValue.getValue().getValue();
for (BoundedWindow boundedWindow : windowedValue.getWindows()) {
@SuppressWarnings("unchecked") W window = (W) boundedWindow;
Object structuralW = windowCoder.structuralValue(window);
Object structuralK = keyCoder.structuralValue(key);
KV<K, List<V>> records = data.computeIfAbsent(structuralW, o -> new HashMap<>()).computeIfAbsent(structuralK, o -> KV.of(key, new ArrayList<>()));
records.getValue().add(value);
}
}
return new MultimapSideInputHandler<K, V, W>() {
@Override
public Iterable<V> get(K key, W window) {
KV<K, List<V>> records = data.getOrDefault(windowCoder.structuralValue(window), Collections.emptyMap()).get(keyCoder.structuralValue(key));
if (records == null) {
return Collections.emptyList();
}
return Collections.unmodifiableList(records.getValue());
}
@Override
public Coder<V> valueCoder() {
return elementCoder.getValueCoder();
}
@Override
public Iterable<K> get(W window) {
Map<Object, KV<K, List<V>>> records = data.getOrDefault(windowCoder.structuralValue(window), Collections.emptyMap());
return Iterables.unmodifiableIterable(FluentIterable.concat(records.values()).transform(kListKV -> kListKV.getKey()));
}
@Override
public Coder<K> keyCoder() {
return elementCoder.getKeyCoder();
}
};
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DefaultJobBundleFactory method getOutputReceivers.
private static Map<String, RemoteOutputReceiver<?>> getOutputReceivers(ExecutableProcessBundleDescriptor processBundleDescriptor, OutputReceiverFactory outputReceiverFactory) {
ImmutableMap.Builder<String, RemoteOutputReceiver<?>> outputReceivers = ImmutableMap.builder();
for (Map.Entry<String, Coder> remoteOutputCoder : processBundleDescriptor.getRemoteOutputCoders().entrySet()) {
String outputTransform = remoteOutputCoder.getKey();
Coder coder = remoteOutputCoder.getValue();
String bundleOutputPCollection = Iterables.getOnlyElement(processBundleDescriptor.getProcessBundleDescriptor().getTransformsOrThrow(outputTransform).getInputsMap().values());
FnDataReceiver outputReceiver = outputReceiverFactory.create(bundleOutputPCollection);
outputReceivers.put(outputTransform, RemoteOutputReceiver.of(coder, outputReceiver));
}
return outputReceivers.build();
}
Aggregations