use of org.apache.beam.sdk.values.KV in project beam by apache.
the class SparkGroupAlsoByWindowViaWindowSet method groupAlsoByWindow.
public static <K, InputT, W extends BoundedWindow> JavaDStream<WindowedValue<KV<K, Iterable<InputT>>>> groupAlsoByWindow(JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>> inputDStream, final Coder<K> keyCoder, final Coder<WindowedValue<InputT>> wvCoder, final WindowingStrategy<?, W> windowingStrategy, final SparkRuntimeContext runtimeContext, final List<Integer> sourceIds) {
final IterableCoder<WindowedValue<InputT>> itrWvCoder = IterableCoder.of(wvCoder);
final Coder<InputT> iCoder = ((FullWindowedValueCoder<InputT>) wvCoder).getValueCoder();
final Coder<? extends BoundedWindow> wCoder = ((FullWindowedValueCoder<InputT>) wvCoder).getWindowCoder();
final Coder<WindowedValue<KV<K, Iterable<InputT>>>> wvKvIterCoder = FullWindowedValueCoder.of(KvCoder.of(keyCoder, IterableCoder.of(iCoder)), wCoder);
final TimerInternals.TimerDataCoder timerDataCoder = TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
long checkpointDurationMillis = runtimeContext.getPipelineOptions().as(SparkPipelineOptions.class).getCheckpointDurationMillis();
// we have to switch to Scala API to avoid Optional in the Java API, see: SPARK-4819.
// we also have a broader API for Scala (access to the actual key and entire iterator).
// we use coders to convert objects in the PCollection to byte arrays, so they
// can be transferred over the network for the shuffle and be in serialized form
// for checkpointing.
// for readability, we add comments with actual type next to byte[].
// to shorten line length, we use:
//---- WV: WindowedValue
//---- Iterable: Itr
//---- AccumT: A
//---- InputT: I
DStream<Tuple2<ByteArray, byte[]>> /*Itr<WV<I>>*/
pairDStream = inputDStream.transformToPair(new Function<JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>>, JavaPairRDD<ByteArray, byte[]>>() {
// we use mapPartitions with the RDD API because its the only available API
// that allows to preserve partitioning.
@Override
public JavaPairRDD<ByteArray, byte[]> call(JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>> rdd) throws Exception {
return rdd.mapPartitions(TranslationUtils.functionToFlatMapFunction(WindowingHelpers.<KV<K, Iterable<WindowedValue<InputT>>>>unwindowFunction()), true).mapPartitionsToPair(TranslationUtils.<K, Iterable<WindowedValue<InputT>>>toPairFlatMapFunction(), true).mapPartitionsToPair(TranslationUtils.pairFunctionToPairFlatMapFunction(CoderHelpers.toByteFunction(keyCoder, itrWvCoder)), true);
}
}).dstream();
PairDStreamFunctions<ByteArray, byte[]> pairDStreamFunctions = DStream.toPairDStreamFunctions(pairDStream, JavaSparkContext$.MODULE$.<ByteArray>fakeClassTag(), JavaSparkContext$.MODULE$.<byte[]>fakeClassTag(), null);
int defaultNumPartitions = pairDStreamFunctions.defaultPartitioner$default$1();
Partitioner partitioner = pairDStreamFunctions.defaultPartitioner(defaultNumPartitions);
// use updateStateByKey to scan through the state and update elements and timers.
DStream<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> /*WV<KV<K, Itr<I>>>*/
firedStream = pairDStreamFunctions.updateStateByKey(new SerializableFunction1<scala.collection.Iterator<Tuple3</*K*/
ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>>>, scala.collection.Iterator<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>>>() {
@Override
public scala.collection.Iterator<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>> apply(final scala.collection.Iterator<Tuple3</*K*/
ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>>> iter) {
//--- ACTUAL STATEFUL OPERATION:
//
// Input Iterator: the partition (~bundle) of a cogrouping of the input
// and the previous state (if exists).
//
// Output Iterator: the output key, and the updated state.
//
// possible input scenarios for (K, Seq, Option<S>):
// (1) Option<S>.isEmpty: new data with no previous state.
// (2) Seq.isEmpty: no new data, but evaluating previous state (timer-like behaviour).
// (3) Seq.nonEmpty && Option<S>.isDefined: new data with previous state.
final SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, W> reduceFn = SystemReduceFn.buffering(((FullWindowedValueCoder<InputT>) wvCoder).getValueCoder());
final OutputWindowedValueHolder<K, InputT> outputHolder = new OutputWindowedValueHolder<>();
// use in memory Aggregators since Spark Accumulators are not resilient
// in stateful operators, once done with this partition.
final MetricsContainerImpl cellProvider = new MetricsContainerImpl("cellProvider");
final CounterCell droppedDueToClosedWindow = cellProvider.getCounter(MetricName.named(SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_CLOSED_WINDOW_COUNTER));
final CounterCell droppedDueToLateness = cellProvider.getCounter(MetricName.named(SparkGroupAlsoByWindowViaWindowSet.class, GroupAlsoByWindowsAggregators.DROPPED_DUE_TO_LATENESS_COUNTER));
AbstractIterator<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> /*WV<KV<K, Itr<I>>>*/
outIter = new AbstractIterator<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>>() {
@Override
protected Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>> computeNext() {
// (possibly) previous-state and (possibly) new data.
while (iter.hasNext()) {
// for each element in the partition:
Tuple3<ByteArray, Seq<byte[]>, Option<Tuple2<StateAndTimers, List<byte[]>>>> next = iter.next();
ByteArray encodedKey = next._1();
K key = CoderHelpers.fromByteArray(encodedKey.getValue(), keyCoder);
Seq<byte[]> seq = next._2();
Option<Tuple2<StateAndTimers, List<byte[]>>> prevStateAndTimersOpt = next._3();
SparkStateInternals<K> stateInternals;
SparkTimerInternals timerInternals = SparkTimerInternals.forStreamFromSources(sourceIds, GlobalWatermarkHolder.get());
// get state(internals) per key.
if (prevStateAndTimersOpt.isEmpty()) {
// no previous state.
stateInternals = SparkStateInternals.forKey(key);
} else {
// with pre-existing state.
StateAndTimers prevStateAndTimers = prevStateAndTimersOpt.get()._1();
stateInternals = SparkStateInternals.forKeyAndState(key, prevStateAndTimers.getState());
Collection<byte[]> serTimers = prevStateAndTimers.getTimers();
timerInternals.addTimers(SparkTimerInternals.deserializeTimers(serTimers, timerDataCoder));
}
ReduceFnRunner<K, InputT, Iterable<InputT>, W> reduceFnRunner = new ReduceFnRunner<>(key, windowingStrategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))), stateInternals, timerInternals, outputHolder, new UnsupportedSideInputReader("GroupAlsoByWindow"), reduceFn, runtimeContext.getPipelineOptions());
// clear before potential use.
outputHolder.clear();
if (!seq.isEmpty()) {
// new input for key.
try {
Iterable<WindowedValue<InputT>> elementsIterable = CoderHelpers.fromByteArray(seq.head(), itrWvCoder);
Iterable<WindowedValue<InputT>> validElements = LateDataUtils.dropExpiredWindows(key, elementsIterable, timerInternals, windowingStrategy, droppedDueToLateness);
reduceFnRunner.processElements(validElements);
} catch (Exception e) {
throw new RuntimeException("Failed to process element with ReduceFnRunner", e);
}
} else if (stateInternals.getState().isEmpty()) {
// no input and no state -> GC evict now.
continue;
}
try {
// advance the watermark to HWM to fire by timers.
timerInternals.advanceWatermark();
// call on timers that are ready.
reduceFnRunner.onTimers(timerInternals.getTimersReadyToProcess());
} catch (Exception e) {
throw new RuntimeException("Failed to process ReduceFnRunner onTimer.", e);
}
// this is mostly symbolic since actual persist is done by emitting output.
reduceFnRunner.persist();
// obtain output, if fired.
List<WindowedValue<KV<K, Iterable<InputT>>>> outputs = outputHolder.get();
if (!outputs.isEmpty() || !stateInternals.getState().isEmpty()) {
StateAndTimers updated = new StateAndTimers(stateInternals.getState(), SparkTimerInternals.serializeTimers(timerInternals.getTimers(), timerDataCoder));
// persist Spark's state by outputting.
List<byte[]> serOutput = CoderHelpers.toByteArrays(outputs, wvKvIterCoder);
return new Tuple2<>(encodedKey, new Tuple2<>(updated, serOutput));
}
// an empty state with no output, can be evicted completely - do nothing.
}
return endOfData();
}
};
// log if there's something to log.
long lateDropped = droppedDueToLateness.getCumulative();
if (lateDropped > 0) {
LOG.info(String.format("Dropped %d elements due to lateness.", lateDropped));
droppedDueToLateness.inc(-droppedDueToLateness.getCumulative());
}
long closedWindowDropped = droppedDueToClosedWindow.getCumulative();
if (closedWindowDropped > 0) {
LOG.info(String.format("Dropped %d elements due to closed window.", closedWindowDropped));
droppedDueToClosedWindow.inc(-droppedDueToClosedWindow.getCumulative());
}
return scala.collection.JavaConversions.asScalaIterator(outIter);
}
}, partitioner, true, JavaSparkContext$.MODULE$.<Tuple2<StateAndTimers, List<byte[]>>>fakeClassTag());
if (checkpointDurationMillis > 0) {
firedStream.checkpoint(new Duration(checkpointDurationMillis));
}
// go back to Java now.
JavaPairDStream<ByteArray, Tuple2<StateAndTimers, List<byte[]>>> /*WV<KV<K, Itr<I>>>*/
javaFiredStream = JavaPairDStream.fromPairDStream(firedStream, JavaSparkContext$.MODULE$.<ByteArray>fakeClassTag(), JavaSparkContext$.MODULE$.<Tuple2<StateAndTimers, List<byte[]>>>fakeClassTag());
// filter state-only output (nothing to fire) and remove the state from the output.
return javaFiredStream.filter(new Function<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>, Boolean>() {
@Override
public Boolean call(Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>> t2) throws Exception {
// filter output if defined.
return !t2._2()._2().isEmpty();
}
}).flatMap(new FlatMapFunction<Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>>, WindowedValue<KV<K, Iterable<InputT>>>>() {
@Override
public Iterable<WindowedValue<KV<K, Iterable<InputT>>>> call(Tuple2</*K*/
ByteArray, Tuple2<StateAndTimers, /*WV<KV<K, Itr<I>>>*/
List<byte[]>>> t2) throws Exception {
// return in serialized form.
return CoderHelpers.fromByteArrays(t2._2()._2(), wvKvIterCoder);
}
});
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class DataflowGroupByKeyTest method testGroupByKeyServiceUnbounded.
@Test
public void testGroupByKeyServiceUnbounded() {
Pipeline p = createTestServiceRunner();
PCollection<KV<String, Integer>> input = p.apply(new PTransform<PBegin, PCollection<KV<String, Integer>>>() {
@Override
public PCollection<KV<String, Integer>> expand(PBegin input) {
return PCollection.<KV<String, Integer>>createPrimitiveOutputInternal(input.getPipeline(), WindowingStrategy.globalDefault(), PCollection.IsBounded.UNBOUNDED).setTypeDescriptor(new TypeDescriptor<KV<String, Integer>>() {
});
}
});
thrown.expect(IllegalStateException.class);
thrown.expectMessage("GroupByKey cannot be applied to non-bounded PCollection in the GlobalWindow without " + "a trigger. Use a Window.into or Window.triggering transform prior to GroupByKey.");
input.apply("GroupByKey", GroupByKey.<String, Integer>create());
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class BatchLoads method expand.
@Override
public WriteResult expand(PCollection<KV<DestinationT, TableRow>> input) {
Pipeline p = input.getPipeline();
final String stepUuid = BigQueryHelpers.randomUUIDString();
PCollectionView<String> tempFilePrefix = p.apply("Create", Create.of((Void) null)).apply("GetTempFilePrefix", ParDo.of(new DoFn<Void, String>() {
@ProcessElement
public void getTempFilePrefix(ProcessContext c) {
c.output(resolveTempLocation(c.getPipelineOptions().getTempLocation(), "BigQueryWriteTemp", stepUuid));
}
})).apply("TempFilePrefixView", View.<String>asSingleton());
// Create a singleton job ID token at execution time. This will be used as the base for all
// load jobs issued from this instance of the transform.
PCollectionView<String> jobIdTokenView = p.apply("TriggerIdCreation", Create.of("ignored")).apply("CreateJobId", MapElements.via(new SimpleFunction<String, String>() {
@Override
public String apply(String input) {
return stepUuid;
}
})).apply(View.<String>asSingleton());
PCollection<KV<DestinationT, TableRow>> inputInGlobalWindow = input.apply("rewindowIntoGlobal", Window.<KV<DestinationT, TableRow>>into(new GlobalWindows()).triggering(DefaultTrigger.of()).discardingFiredPanes());
PCollectionView<Map<DestinationT, String>> schemasView = inputInGlobalWindow.apply(new CalculateSchemas<>(dynamicDestinations));
TupleTag<WriteBundlesToFiles.Result<DestinationT>> writtenFilesTag = new TupleTag<WriteBundlesToFiles.Result<DestinationT>>("writtenFiles") {
};
TupleTag<KV<ShardedKey<DestinationT>, TableRow>> unwrittedRecordsTag = new TupleTag<KV<ShardedKey<DestinationT>, TableRow>>("unwrittenRecords") {
};
PCollectionTuple writeBundlesTuple = inputInGlobalWindow.apply("WriteBundlesToFiles", ParDo.of(new WriteBundlesToFiles<>(stepUuid, unwrittedRecordsTag, maxNumWritersPerBundle, maxFileSize)).withOutputTags(writtenFilesTag, TupleTagList.of(unwrittedRecordsTag)));
PCollection<WriteBundlesToFiles.Result<DestinationT>> writtenFiles = writeBundlesTuple.get(writtenFilesTag).setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder));
// If the bundles contain too many output tables to be written inline to files (due to memory
// limits), any unwritten records will be spilled to the unwrittenRecordsTag PCollection.
// Group these records by key, and write the files after grouping. Since the record is grouped
// by key, we can ensure that only one file is open at a time in each bundle.
PCollection<WriteBundlesToFiles.Result<DestinationT>> writtenFilesGrouped = writeBundlesTuple.get(unwrittedRecordsTag).setCoder(KvCoder.of(ShardedKeyCoder.of(destinationCoder), TableRowJsonCoder.of())).apply(GroupByKey.<ShardedKey<DestinationT>, TableRow>create()).apply(ParDo.of(new WriteGroupedRecordsToFiles<DestinationT>(tempFilePrefix, maxFileSize)).withSideInputs(tempFilePrefix)).setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder));
// PCollection of filename, file byte size, and table destination.
PCollection<WriteBundlesToFiles.Result<DestinationT>> results = PCollectionList.of(writtenFiles).and(writtenFilesGrouped).apply(Flatten.<Result<DestinationT>>pCollections());
TupleTag<KV<ShardedKey<DestinationT>, List<String>>> multiPartitionsTag = new TupleTag<KV<ShardedKey<DestinationT>, List<String>>>("multiPartitionsTag") {
};
TupleTag<KV<ShardedKey<DestinationT>, List<String>>> singlePartitionTag = new TupleTag<KV<ShardedKey<DestinationT>, List<String>>>("singlePartitionTag") {
};
// Turn the list of files and record counts in a PCollectionView that can be used as a
// side input.
PCollectionView<Iterable<WriteBundlesToFiles.Result<DestinationT>>> resultsView = results.apply("ResultsView", View.<WriteBundlesToFiles.Result<DestinationT>>asIterable());
// This transform will look at the set of files written for each table, and if any table has
// too many files or bytes, will partition that table's files into multiple partitions for
// loading.
PCollection<Void> singleton = p.apply("singleton", Create.of((Void) null).withCoder(VoidCoder.of()));
PCollectionTuple partitions = singleton.apply("WritePartition", ParDo.of(new WritePartition<>(singletonTable, tempFilePrefix, resultsView, multiPartitionsTag, singlePartitionTag)).withSideInputs(tempFilePrefix, resultsView).withOutputTags(multiPartitionsTag, TupleTagList.of(singlePartitionTag)));
List<PCollectionView<?>> writeTablesSideInputs = Lists.newArrayList(jobIdTokenView, schemasView);
writeTablesSideInputs.addAll(dynamicDestinations.getSideInputs());
Coder<KV<ShardedKey<DestinationT>, List<String>>> partitionsCoder = KvCoder.of(ShardedKeyCoder.of(NullableCoder.of(destinationCoder)), ListCoder.of(StringUtf8Coder.of()));
// If WriteBundlesToFiles produced more than MAX_NUM_FILES files or MAX_SIZE_BYTES bytes, then
// the import needs to be split into multiple partitions, and those partitions will be
// specified in multiPartitionsTag.
PCollection<KV<TableDestination, String>> tempTables = partitions.get(multiPartitionsTag).setCoder(partitionsCoder).apply("MultiPartitionsReshuffle", Reshuffle.<ShardedKey<DestinationT>, List<String>>of()).apply("MultiPartitionsWriteTables", ParDo.of(new WriteTables<>(false, bigQueryServices, jobIdTokenView, schemasView, WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, dynamicDestinations)).withSideInputs(writeTablesSideInputs));
// This view maps each final table destination to the set of temporary partitioned tables
// the PCollection was loaded into.
PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = tempTables.apply("TempTablesView", View.<TableDestination, String>asMultimap());
singleton.apply("WriteRename", ParDo.of(new WriteRename(bigQueryServices, jobIdTokenView, writeDisposition, createDisposition, tempTablesView)).withSideInputs(tempTablesView, jobIdTokenView));
// Write single partition to final table
partitions.get(singlePartitionTag).setCoder(partitionsCoder).apply("SinglePartitionsReshuffle", Reshuffle.<ShardedKey<DestinationT>, List<String>>of()).apply("SinglePartitionWriteTables", ParDo.of(new WriteTables<>(true, bigQueryServices, jobIdTokenView, schemasView, writeDisposition, createDisposition, dynamicDestinations)).withSideInputs(writeTablesSideInputs));
PCollection<TableRow> empty = p.apply("CreateEmptyFailedInserts", Create.empty(TypeDescriptor.of(TableRow.class)));
return WriteResult.in(input.getPipeline(), new TupleTag<TableRow>("failedInserts"), empty);
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class StatefulDoFnRunnerTest method testGarbageCollect.
@Test
public void testGarbageCollect() throws Exception {
timerInternals.advanceInputWatermark(new Instant(1L));
MyDoFn fn = new MyDoFn();
StateTag<ValueState<Integer>> stateTag = StateTags.tagForSpec(fn.stateId, fn.intState);
DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
Instant elementTime = new Instant(1);
// first element, key is hello, WINDOW_1
runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_1, PaneInfo.NO_FIRING));
assertEquals(1, (int) stateInternals.state(windowNamespace(WINDOW_1), stateTag).read());
// second element, key is hello, WINDOW_2
runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
// advance watermark past end of WINDOW_1 + allowed lateness
// the cleanup timer is set to window.maxTimestamp() + allowed lateness + 1
// to ensure that state is still available when a user timer for window.maxTimestamp() fires
advanceInputWatermark(timerInternals, WINDOW_1.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
1), runner);
assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_1), stateTag)));
assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
// advance watermark past end of WINDOW_2 + allowed lateness
advanceInputWatermark(timerInternals, WINDOW_2.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
1), runner);
assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_2), stateTag)));
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class StatefulDoFnRunnerTest method testLateDropping.
@Test
public void testLateDropping() throws Exception {
MetricsContainerImpl container = new MetricsContainerImpl("any");
MetricsEnvironment.setCurrentContainer(container);
timerInternals.advanceInputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
timerInternals.advanceOutputWatermark(new Instant(BoundedWindow.TIMESTAMP_MAX_VALUE));
DoFn<KV<String, Integer>, Integer> fn = new MyDoFn();
DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
runner.startBundle();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(0L + WINDOW_SIZE));
Instant timestamp = new Instant(0);
runner.processElement(WindowedValue.of(KV.of("hello", 1), timestamp, window, PaneInfo.NO_FIRING));
long droppedValues = container.getCounter(MetricName.named(StatefulDoFnRunner.class, StatefulDoFnRunner.DROPPED_DUE_TO_LATENESS_COUNTER)).getCumulative().longValue();
assertEquals(1L, droppedValues);
runner.finishBundle();
}
Aggregations